[llvm] bc323b6 - AMDGPU: Stop implementing shouldCoalesce (#168988)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 21 07:11:00 PST 2025
Author: Matt Arsenault
Date: 2025-11-21T10:10:35-05:00
New Revision: bc323b609bd54747b8acda45d91a19f7a343a91b
URL: https://github.com/llvm/llvm-project/commit/bc323b609bd54747b8acda45d91a19f7a343a91b
DIFF: https://github.com/llvm/llvm-project/commit/bc323b609bd54747b8acda45d91a19f7a343a91b.diff
LOG: AMDGPU: Stop implementing shouldCoalesce (#168988)
Use the default, which freely coalesces anything it can.
This mostly shows improvements, with a handful of regressions.
The main concern would be if introducing wider registers is more
likely to push the register usage up to the next occupancy tier.
Added:
llvm/test/CodeGen/AMDGPU/no-limit-coalesce.mir
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
llvm/test/CodeGen/AMDGPU/freeze.ll
llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
llvm/test/CodeGen/AMDGPU/load-global-i16.ll
llvm/test/CodeGen/AMDGPU/load-local-i16.ll
llvm/test/CodeGen/AMDGPU/merge-stores.ll
llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
llvm/test/CodeGen/AMDGPU/scratch-simple.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
Removed:
llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 5484fab3efdcc..ad3828fba2187 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3755,20 +3755,6 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
return RC && isAGPRClass(RC);
}
-bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const {
- // TODO: This should be more aggressive, but be more cautious with very wide
- // tuples.
- unsigned NewSize = getRegSizeInBits(*NewRC);
- return NewSize <= 128 || NewSize <= getRegSizeInBits(*SrcRC) ||
- NewSize <= getRegSizeInBits(*DstRC);
-}
-
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index bb8a80f811d4c..2e2916f68f584 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -346,14 +346,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const;
- bool shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const override;
-
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
index 34e4931674cce..b12fa0a51046d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
@@ -2414,63 +2414,62 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v14, 0
+; GFX7-NEXT: v_mul_lo_u32 v28, v4, v11
; GFX7-NEXT: v_mul_lo_u32 v29, v3, v12
-; GFX7-NEXT: v_mul_lo_u32 v30, v2, v13
; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v1, v13, v[16:17]
+; GFX7-NEXT: v_mul_lo_u32 v30, v2, v13
+; GFX7-NEXT: v_mul_lo_u32 v27, v5, v10
; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v12, v[18:19]
-; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v0, v10, 0
-; GFX7-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v3, v11, v[16:17]
-; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v1, v9, v[18:19]
-; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v4, v10, v[20:21]
-; GFX7-NEXT: v_mad_u64_u32 v[20:21], s[6:7], v0, v12, 0
-; GFX7-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v5, v9, v[18:19]
-; GFX7-NEXT: v_mad_u64_u32 v[24:25], s[6:7], v1, v11, v[20:21]
-; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
-; GFX7-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v8, v[16:17]
-; GFX7-NEXT: v_addc_u32_e32 v28, vcc, 0, v20, vcc
-; GFX7-NEXT: v_mad_u64_u32 v[16:17], vcc, v2, v10, v[24:25]
-; GFX7-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GFX7-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc
-; GFX7-NEXT: v_mad_u64_u32 v[26:27], s[8:9], v6, v8, v[22:23]
-; GFX7-NEXT: v_mad_u64_u32 v[22:23], vcc, v3, v9, v[16:17]
-; GFX7-NEXT: v_addc_u32_e32 v16, vcc, 0, v20, vcc
-; GFX7-NEXT: v_mad_u64_u32 v[20:21], vcc, v4, v8, v[22:23]
-; GFX7-NEXT: v_mov_b32_e32 v22, v26
-; GFX7-NEXT: v_addc_u32_e32 v23, vcc, 0, v16, vcc
-; GFX7-NEXT: v_mad_u64_u32 v[16:17], vcc, v0, v13, v[21:22]
-; GFX7-NEXT: v_mad_u64_u32 v[21:22], s[12:13], v0, v11, v[19:20]
-; GFX7-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v1, v12, v[16:17]
-; GFX7-NEXT: v_mul_lo_u32 v26, v4, v11
-; GFX7-NEXT: v_mul_lo_u32 v25, v5, v10
-; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v2, v11, v[19:20]
-; GFX7-NEXT: v_mul_lo_u32 v24, v6, v9
-; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[8:9], v3, v10, v[16:17]
-; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[10:11], v4, v9, v[11:12]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13]
-; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[12:13], v1, v10, v[21:22]
-; GFX7-NEXT: v_addc_u32_e64 v4, s[12:13], 0, v4, s[12:13]
-; GFX7-NEXT: v_mad_u64_u32 v[19:20], s[12:13], v2, v9, v[11:12]
-; GFX7-NEXT: v_mul_lo_u32 v10, v1, v14
-; GFX7-NEXT: v_mad_u64_u32 v[13:14], s[14:15], v5, v8, v[16:17]
+; GFX7-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v3, v11, v[16:17]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v10, 0
+; GFX7-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v4, v10, v[18:19]
+; GFX7-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v1, v9, v[16:17]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v5, v9, v[20:21]
+; GFX7-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5]
+; GFX7-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v8, v[22:23]
+; GFX7-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v6, v8, v[16:17]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v12, 0
+; GFX7-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v1, v11, v[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[4:5]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v10, v[20:21]
+; GFX7-NEXT: v_addc_u32_e64 v20, s[4:5], 0, v24, s[4:5]
+; GFX7-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v3, v9, v[16:17]
+; GFX7-NEXT: v_addc_u32_e64 v16, s[4:5], 0, v20, s[4:5]
+; GFX7-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v4, v8, v[24:25]
+; GFX7-NEXT: v_addc_u32_e64 v24, s[4:5], 0, v16, s[4:5]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v13, v[21:22]
+; GFX7-NEXT: v_mul_lo_u32 v25, v6, v9
+; GFX7-NEXT: v_mad_u64_u32 v[21:22], s[6:7], v1, v12, v[16:17]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[8:9], v2, v11, v[21:22]
+; GFX7-NEXT: v_mad_u64_u32 v[21:22], s[10:11], v3, v10, v[16:17]
+; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[14:15], v0, v11, v[19:20]
+; GFX7-NEXT: v_mad_u64_u32 v[12:13], s[12:13], v4, v9, v[21:22]
+; GFX7-NEXT: v_addc_u32_e32 v4, vcc, 0, v26, vcc
+; GFX7-NEXT: v_mad_u64_u32 v[19:20], vcc, v1, v10, v[16:17]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX7-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GFX7-NEXT: v_mad_u64_u32 v[10:11], vcc, v2, v9, v[19:20]
; GFX7-NEXT: v_mad_u64_u32 v[16:17], s[16:17], v0, v8, 0
-; GFX7-NEXT: v_addc_u32_e64 v2, s[12:13], 0, v4, s[12:13]
-; GFX7-NEXT: v_mad_u64_u32 v[21:22], s[12:13], v3, v8, v[19:20]
-; GFX7-NEXT: v_addc_u32_e64 v5, s[12:13], 0, v2, s[12:13]
-; GFX7-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v0, v9, v[17:18]
-; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13]
+; GFX7-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc
+; GFX7-NEXT: v_mad_u64_u32 v[19:20], vcc, v3, v8, v[10:11]
+; GFX7-NEXT: v_mad_u64_u32 v[21:22], s[14:15], v5, v8, v[12:13]
+; GFX7-NEXT: v_addc_u32_e32 v5, vcc, 0, v2, vcc
+; GFX7-NEXT: v_mad_u64_u32 v[2:3], s[16:17], v0, v9, v[17:18]
+; GFX7-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[16:17]
; GFX7-NEXT: v_mul_lo_u32 v0, v0, v15
-; GFX7-NEXT: v_mad_u64_u32 v[11:12], s[12:13], v1, v8, v[2:3]
-; GFX7-NEXT: v_addc_u32_e64 v3, s[12:13], v4, v21, s[12:13]
-; GFX7-NEXT: v_addc_u32_e64 v4, s[12:13], v28, v22, s[12:13]
-; GFX7-NEXT: v_addc_u32_e64 v5, s[12:13], v5, v13, s[12:13]
-; GFX7-NEXT: v_addc_u32_e64 v6, s[12:13], v23, v14, s[12:13]
-; GFX7-NEXT: v_addc_u32_e64 v0, s[12:13], v27, v0, s[12:13]
-; GFX7-NEXT: v_addc_u32_e64 v0, s[12:13], v0, v10, s[14:15]
-; GFX7-NEXT: v_addc_u32_e64 v0, s[10:11], v0, v30, s[10:11]
-; GFX7-NEXT: v_addc_u32_e64 v0, s[8:9], v0, v29, s[8:9]
-; GFX7-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v26, s[6:7]
-; GFX7-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v25, s[4:5]
-; GFX7-NEXT: v_addc_u32_e32 v0, vcc, v0, v24, vcc
+; GFX7-NEXT: v_mad_u64_u32 v[11:12], vcc, v1, v8, v[2:3]
+; GFX7-NEXT: v_addc_u32_e32 v3, vcc, v6, v19, vcc
+; GFX7-NEXT: v_mul_lo_u32 v10, v1, v14
+; GFX7-NEXT: v_addc_u32_e32 v4, vcc, v4, v20, vcc
+; GFX7-NEXT: v_addc_u32_e32 v5, vcc, v5, v21, vcc
+; GFX7-NEXT: v_addc_u32_e32 v6, vcc, v24, v22, vcc
+; GFX7-NEXT: v_addc_u32_e32 v0, vcc, v23, v0, vcc
+; GFX7-NEXT: v_addc_u32_e64 v0, vcc, v0, v10, s[14:15]
+; GFX7-NEXT: v_addc_u32_e64 v0, vcc, v0, v30, s[12:13]
+; GFX7-NEXT: v_addc_u32_e64 v0, vcc, v0, v29, s[10:11]
+; GFX7-NEXT: v_addc_u32_e64 v0, vcc, v0, v28, s[8:9]
+; GFX7-NEXT: v_addc_u32_e64 v0, vcc, v0, v27, s[6:7]
+; GFX7-NEXT: v_addc_u32_e64 v0, vcc, v0, v25, s[4:5]
; GFX7-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v7, v8, v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v0, v16
; GFX7-NEXT: v_mov_b32_e32 v1, v11
@@ -2482,63 +2481,62 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v14, 0
+; GFX8-NEXT: v_mul_lo_u32 v28, v4, v11
; GFX8-NEXT: v_mul_lo_u32 v29, v3, v12
-; GFX8-NEXT: v_mul_lo_u32 v30, v2, v13
; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v1, v13, v[16:17]
+; GFX8-NEXT: v_mul_lo_u32 v30, v2, v13
+; GFX8-NEXT: v_mul_lo_u32 v27, v5, v10
; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v12, v[18:19]
-; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v0, v10, 0
-; GFX8-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v3, v11, v[16:17]
-; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v1, v9, v[18:19]
-; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v4, v10, v[20:21]
-; GFX8-NEXT: v_mad_u64_u32 v[20:21], s[6:7], v0, v12, 0
-; GFX8-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v5, v9, v[18:19]
-; GFX8-NEXT: v_mad_u64_u32 v[24:25], s[6:7], v1, v11, v[20:21]
-; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
-; GFX8-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v8, v[16:17]
-; GFX8-NEXT: v_addc_u32_e32 v28, vcc, 0, v20, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[16:17], vcc, v2, v10, v[24:25]
-; GFX8-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GFX8-NEXT: v_addc_u32_e32 v20, vcc, 0, v20, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[26:27], s[8:9], v6, v8, v[22:23]
-; GFX8-NEXT: v_mad_u64_u32 v[22:23], vcc, v3, v9, v[16:17]
-; GFX8-NEXT: v_addc_u32_e32 v16, vcc, 0, v20, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[20:21], vcc, v4, v8, v[22:23]
-; GFX8-NEXT: v_mov_b32_e32 v22, v26
-; GFX8-NEXT: v_addc_u32_e32 v23, vcc, 0, v16, vcc
-; GFX8-NEXT: v_mad_u64_u32 v[16:17], vcc, v0, v13, v[21:22]
-; GFX8-NEXT: v_mad_u64_u32 v[21:22], s[12:13], v0, v11, v[19:20]
-; GFX8-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v1, v12, v[16:17]
-; GFX8-NEXT: v_mul_lo_u32 v26, v4, v11
-; GFX8-NEXT: v_mul_lo_u32 v25, v5, v10
-; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v2, v11, v[19:20]
-; GFX8-NEXT: v_mul_lo_u32 v24, v6, v9
-; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[8:9], v3, v10, v[16:17]
-; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[10:11], v4, v9, v[11:12]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13]
-; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[12:13], v1, v10, v[21:22]
-; GFX8-NEXT: v_addc_u32_e64 v4, s[12:13], 0, v4, s[12:13]
-; GFX8-NEXT: v_mad_u64_u32 v[19:20], s[12:13], v2, v9, v[11:12]
-; GFX8-NEXT: v_mul_lo_u32 v10, v1, v14
-; GFX8-NEXT: v_mad_u64_u32 v[13:14], s[14:15], v5, v8, v[16:17]
+; GFX8-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v3, v11, v[16:17]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v10, 0
+; GFX8-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v4, v10, v[18:19]
+; GFX8-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v1, v9, v[16:17]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v5, v9, v[20:21]
+; GFX8-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5]
+; GFX8-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v8, v[22:23]
+; GFX8-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v6, v8, v[16:17]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v12, 0
+; GFX8-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v1, v11, v[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[4:5]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v10, v[20:21]
+; GFX8-NEXT: v_addc_u32_e64 v20, s[4:5], 0, v24, s[4:5]
+; GFX8-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v3, v9, v[16:17]
+; GFX8-NEXT: v_addc_u32_e64 v16, s[4:5], 0, v20, s[4:5]
+; GFX8-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v4, v8, v[24:25]
+; GFX8-NEXT: v_addc_u32_e64 v24, s[4:5], 0, v16, s[4:5]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v13, v[21:22]
+; GFX8-NEXT: v_mul_lo_u32 v25, v6, v9
+; GFX8-NEXT: v_mad_u64_u32 v[21:22], s[6:7], v1, v12, v[16:17]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[8:9], v2, v11, v[21:22]
+; GFX8-NEXT: v_mad_u64_u32 v[21:22], s[10:11], v3, v10, v[16:17]
+; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[14:15], v0, v11, v[19:20]
+; GFX8-NEXT: v_mad_u64_u32 v[12:13], s[12:13], v4, v9, v[21:22]
+; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v26, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[19:20], vcc, v1, v10, v[16:17]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[10:11], vcc, v2, v9, v[19:20]
; GFX8-NEXT: v_mad_u64_u32 v[16:17], s[16:17], v0, v8, 0
-; GFX8-NEXT: v_addc_u32_e64 v2, s[12:13], 0, v4, s[12:13]
-; GFX8-NEXT: v_mad_u64_u32 v[21:22], s[12:13], v3, v8, v[19:20]
-; GFX8-NEXT: v_addc_u32_e64 v5, s[12:13], 0, v2, s[12:13]
-; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v0, v9, v[17:18]
-; GFX8-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13]
+; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[19:20], vcc, v3, v8, v[10:11]
+; GFX8-NEXT: v_mad_u64_u32 v[21:22], s[14:15], v5, v8, v[12:13]
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, 0, v2, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[2:3], s[16:17], v0, v9, v[17:18]
+; GFX8-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[16:17]
; GFX8-NEXT: v_mul_lo_u32 v0, v0, v15
-; GFX8-NEXT: v_mad_u64_u32 v[11:12], s[12:13], v1, v8, v[2:3]
-; GFX8-NEXT: v_addc_u32_e64 v3, s[12:13], v4, v21, s[12:13]
-; GFX8-NEXT: v_addc_u32_e64 v4, s[12:13], v28, v22, s[12:13]
-; GFX8-NEXT: v_addc_u32_e64 v5, s[12:13], v5, v13, s[12:13]
-; GFX8-NEXT: v_addc_u32_e64 v6, s[12:13], v23, v14, s[12:13]
-; GFX8-NEXT: v_addc_u32_e64 v0, s[12:13], v27, v0, s[12:13]
-; GFX8-NEXT: v_addc_u32_e64 v0, s[12:13], v0, v10, s[14:15]
-; GFX8-NEXT: v_addc_u32_e64 v0, s[10:11], v0, v30, s[10:11]
-; GFX8-NEXT: v_addc_u32_e64 v0, s[8:9], v0, v29, s[8:9]
-; GFX8-NEXT: v_addc_u32_e64 v0, s[6:7], v0, v26, s[6:7]
-; GFX8-NEXT: v_addc_u32_e64 v0, s[4:5], v0, v25, s[4:5]
-; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v0, v24, vcc
+; GFX8-NEXT: v_mad_u64_u32 v[11:12], vcc, v1, v8, v[2:3]
+; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v6, v19, vcc
+; GFX8-NEXT: v_mul_lo_u32 v10, v1, v14
+; GFX8-NEXT: v_addc_u32_e32 v4, vcc, v4, v20, vcc
+; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v5, v21, vcc
+; GFX8-NEXT: v_addc_u32_e32 v6, vcc, v24, v22, vcc
+; GFX8-NEXT: v_addc_u32_e32 v0, vcc, v23, v0, vcc
+; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v10, s[14:15]
+; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v30, s[12:13]
+; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v29, s[10:11]
+; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v28, s[8:9]
+; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v27, s[6:7]
+; GFX8-NEXT: v_addc_u32_e64 v0, vcc, v0, v25, s[4:5]
; GFX8-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v7, v8, v[0:1]
; GFX8-NEXT: v_mov_b32_e32 v0, v16
; GFX8-NEXT: v_mov_b32_e32 v1, v11
@@ -2550,63 +2548,62 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v14, 0
+; GFX9-NEXT: v_mul_lo_u32 v28, v4, v11
; GFX9-NEXT: v_mul_lo_u32 v29, v3, v12
-; GFX9-NEXT: v_mul_lo_u32 v30, v2, v13
; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v1, v13, v[16:17]
+; GFX9-NEXT: v_mul_lo_u32 v30, v2, v13
+; GFX9-NEXT: v_mul_lo_u32 v27, v5, v10
; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v12, v[18:19]
-; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v0, v10, 0
-; GFX9-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v3, v11, v[16:17]
-; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v1, v9, v[18:19]
-; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v4, v10, v[20:21]
-; GFX9-NEXT: v_mad_u64_u32 v[20:21], s[6:7], v0, v12, 0
-; GFX9-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v5, v9, v[18:19]
-; GFX9-NEXT: v_mad_u64_u32 v[24:25], s[6:7], v1, v11, v[20:21]
-; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
-; GFX9-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v8, v[16:17]
-; GFX9-NEXT: v_addc_co_u32_e32 v28, vcc, 0, v20, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[16:17], vcc, v2, v10, v[24:25]
-; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
-; GFX9-NEXT: v_addc_co_u32_e32 v20, vcc, 0, v20, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[26:27], s[8:9], v6, v8, v[22:23]
-; GFX9-NEXT: v_mad_u64_u32 v[22:23], vcc, v3, v9, v[16:17]
-; GFX9-NEXT: v_addc_co_u32_e32 v16, vcc, 0, v20, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[20:21], vcc, v4, v8, v[22:23]
-; GFX9-NEXT: v_mov_b32_e32 v22, v26
-; GFX9-NEXT: v_addc_co_u32_e32 v23, vcc, 0, v16, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[16:17], vcc, v0, v13, v[21:22]
-; GFX9-NEXT: v_mad_u64_u32 v[21:22], s[12:13], v0, v11, v[19:20]
-; GFX9-NEXT: v_mad_u64_u32 v[19:20], s[4:5], v1, v12, v[16:17]
-; GFX9-NEXT: v_mul_lo_u32 v26, v4, v11
-; GFX9-NEXT: v_mul_lo_u32 v25, v5, v10
-; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v2, v11, v[19:20]
-; GFX9-NEXT: v_mul_lo_u32 v24, v6, v9
-; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[8:9], v3, v10, v[16:17]
-; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[10:11], v4, v9, v[11:12]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13]
-; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[12:13], v1, v10, v[21:22]
-; GFX9-NEXT: v_addc_co_u32_e64 v4, s[12:13], 0, v4, s[12:13]
-; GFX9-NEXT: v_mad_u64_u32 v[19:20], s[12:13], v2, v9, v[11:12]
-; GFX9-NEXT: v_mul_lo_u32 v10, v1, v14
-; GFX9-NEXT: v_mad_u64_u32 v[13:14], s[14:15], v5, v8, v[16:17]
+; GFX9-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v3, v11, v[16:17]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v10, 0
+; GFX9-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v4, v10, v[18:19]
+; GFX9-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v1, v9, v[16:17]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v5, v9, v[20:21]
+; GFX9-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5]
+; GFX9-NEXT: v_mad_u64_u32 v[18:19], vcc, v2, v8, v[22:23]
+; GFX9-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v6, v8, v[16:17]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v12, 0
+; GFX9-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v1, v11, v[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[4:5]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v2, v10, v[20:21]
+; GFX9-NEXT: v_addc_co_u32_e64 v20, s[4:5], 0, v24, s[4:5]
+; GFX9-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v3, v9, v[16:17]
+; GFX9-NEXT: v_addc_co_u32_e64 v16, s[4:5], 0, v20, s[4:5]
+; GFX9-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v4, v8, v[24:25]
+; GFX9-NEXT: v_addc_co_u32_e64 v24, s[4:5], 0, v16, s[4:5]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v0, v13, v[21:22]
+; GFX9-NEXT: v_mul_lo_u32 v25, v6, v9
+; GFX9-NEXT: v_mad_u64_u32 v[21:22], s[6:7], v1, v12, v[16:17]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[8:9], v2, v11, v[21:22]
+; GFX9-NEXT: v_mad_u64_u32 v[21:22], s[10:11], v3, v10, v[16:17]
+; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[14:15], v0, v11, v[19:20]
+; GFX9-NEXT: v_mad_u64_u32 v[12:13], s[12:13], v4, v9, v[21:22]
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v26, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[19:20], vcc, v1, v10, v[16:17]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[10:11], vcc, v2, v9, v[19:20]
; GFX9-NEXT: v_mad_u64_u32 v[16:17], s[16:17], v0, v8, 0
-; GFX9-NEXT: v_addc_co_u32_e64 v2, s[12:13], 0, v4, s[12:13]
-; GFX9-NEXT: v_mad_u64_u32 v[21:22], s[12:13], v3, v8, v[19:20]
-; GFX9-NEXT: v_addc_co_u32_e64 v5, s[12:13], 0, v2, s[12:13]
-; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[12:13], v0, v9, v[17:18]
-; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[12:13]
+; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v6, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[19:20], vcc, v3, v8, v[10:11]
+; GFX9-NEXT: v_mad_u64_u32 v[21:22], s[14:15], v5, v8, v[12:13]
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v2, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[16:17], v0, v9, v[17:18]
+; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[16:17]
; GFX9-NEXT: v_mul_lo_u32 v0, v0, v15
-; GFX9-NEXT: v_mad_u64_u32 v[11:12], s[12:13], v1, v8, v[2:3]
-; GFX9-NEXT: v_addc_co_u32_e64 v3, s[12:13], v4, v21, s[12:13]
-; GFX9-NEXT: v_addc_co_u32_e64 v4, s[12:13], v28, v22, s[12:13]
-; GFX9-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v13, s[12:13]
-; GFX9-NEXT: v_addc_co_u32_e64 v6, s[12:13], v23, v14, s[12:13]
-; GFX9-NEXT: v_addc_co_u32_e64 v0, s[12:13], v27, v0, s[12:13]
-; GFX9-NEXT: v_addc_co_u32_e64 v0, s[12:13], v0, v10, s[14:15]
-; GFX9-NEXT: v_addc_co_u32_e64 v0, s[10:11], v0, v30, s[10:11]
-; GFX9-NEXT: v_addc_co_u32_e64 v0, s[8:9], v0, v29, s[8:9]
-; GFX9-NEXT: v_addc_co_u32_e64 v0, s[6:7], v0, v26, s[6:7]
-; GFX9-NEXT: v_addc_co_u32_e64 v0, s[4:5], v0, v25, s[4:5]
-; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v24, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[11:12], vcc, v1, v8, v[2:3]
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v6, v19, vcc
+; GFX9-NEXT: v_mul_lo_u32 v10, v1, v14
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v20, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v5, v21, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v24, v22, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v23, v0, vcc
+; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v10, s[14:15]
+; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v30, s[12:13]
+; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v29, s[10:11]
+; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v28, s[8:9]
+; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v27, s[6:7]
+; GFX9-NEXT: v_addc_co_u32_e64 v0, vcc, v0, v25, s[4:5]
; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v7, v8, v[0:1]
; GFX9-NEXT: v_mov_b32_e32 v0, v16
; GFX9-NEXT: v_mov_b32_e32 v1, v11
@@ -2621,66 +2618,68 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX10-NEXT: v_mov_b32_e32 v17, v1
; GFX10-NEXT: v_mov_b32_e32 v18, v2
; GFX10-NEXT: v_mov_b32_e32 v19, v3
-; GFX10-NEXT: v_mul_lo_u32 v27, v6, v9
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v16, v14, 0
-; GFX10-NEXT: v_mul_lo_u32 v30, v4, v11
-; GFX10-NEXT: v_mul_lo_u32 v28, v5, v10
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s4, v17, v13, v[0:1]
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v18, v12, v[2:3]
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s4, v16, v12, 0
-; GFX10-NEXT: v_mad_u64_u32 v[20:21], s4, v19, v11, v[0:1]
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v11, v[2:3]
-; GFX10-NEXT: v_cndmask_b32_e64 v22, 0, 1, s4
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s5, v4, v10, v[20:21]
-; GFX10-NEXT: v_mad_u64_u32 v[20:21], vcc_lo, v18, v10, v[0:1]
-; GFX10-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v22, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v5, v9, v[2:3]
-; GFX10-NEXT: v_mad_u64_u32 v[2:3], s4, v16, v10, 0
-; GFX10-NEXT: v_mad_u64_u32 v[23:24], vcc_lo, v19, v9, v[20:21]
-; GFX10-NEXT: v_mad_u64_u32 v[25:26], s4, v6, v8, v[0:1]
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v9, v[2:3]
-; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v22, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[21:22], vcc_lo, v4, v8, v[23:24]
-; GFX10-NEXT: v_mov_b32_e32 v23, v25
-; GFX10-NEXT: v_cndmask_b32_e64 v20, 0, 1, s4
+; GFX10-NEXT: v_mov_b32_e32 v20, v4
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v16, v14, 0
+; GFX10-NEXT: v_mov_b32_e32 v21, v5
+; GFX10-NEXT: v_mov_b32_e32 v0, v6
+; GFX10-NEXT: v_mov_b32_e32 v22, v7
+; GFX10-NEXT: v_mul_lo_u32 v31, v17, v14
+; GFX10-NEXT: v_mul_lo_u32 v29, v20, v11
+; GFX10-NEXT: v_mul_lo_u32 v30, v16, v15
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s4, v17, v13, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s4, v16, v12, 0
+; GFX10-NEXT: v_mul_lo_u32 v27, v0, v9
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s4, v18, v12, v[3:4]
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s4, v17, v11, v[1:2]
+; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, 1, s4
+; GFX10-NEXT: v_mad_u64_u32 v[25:26], s4, v16, v10, 0
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s5, v19, v11, v[5:6]
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], vcc_lo, v18, v10, v[3:4]
+; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s4, v20, v10, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], vcc_lo, v19, v9, v[5:6]
+; GFX10-NEXT: v_mad_u64_u32 v[23:24], s4, v21, v9, v[3:4]
+; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v7, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[4:5], vcc_lo, v20, v8, v[1:2]
+; GFX10-NEXT: v_mad_u64_u32 v[6:7], s4, v0, v8, v[23:24]
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s4, v17, v9, v[25:26]
+; GFX10-NEXT: v_add_co_ci_u32_e32 v25, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v28, 0, 1, s4
+; GFX10-NEXT: v_mul_lo_u32 v26, v21, v10
+; GFX10-NEXT: v_mad_u64_u32 v[23:24], vcc_lo, v16, v13, v[5:6]
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s4, v18, v8, v[0:1]
-; GFX10-NEXT: v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], vcc_lo, v16, v13, v[22:23]
-; GFX10-NEXT: v_add_co_ci_u32_e64 v29, s4, 0, v20, s4
-; GFX10-NEXT: v_mov_b32_e32 v20, v3
-; GFX10-NEXT: v_mad_u64_u32 v[22:23], s4, v17, v12, v[0:1]
-; GFX10-NEXT: v_mad_u64_u32 v[24:25], s6, v16, v11, v[20:21]
-; GFX10-NEXT: v_mad_u64_u32 v[0:1], s5, v16, v8, 0
-; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
-; GFX10-NEXT: v_mad_u64_u32 v[20:21], s5, v18, v11, v[22:23]
-; GFX10-NEXT: v_mul_lo_u32 v22, v16, v15
-; GFX10-NEXT: v_mul_lo_u32 v23, v17, v14
-; GFX10-NEXT: v_mad_u64_u32 v[14:15], s6, v17, v10, v[24:25]
-; GFX10-NEXT: v_mul_lo_u32 v24, v19, v12
-; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s6, 0, v3, s6
-; GFX10-NEXT: v_mad_u64_u32 v[11:12], s7, v19, v10, v[20:21]
-; GFX10-NEXT: v_mul_lo_u32 v25, v18, v13
-; GFX10-NEXT: v_mad_u64_u32 v[20:21], s6, v18, v9, v[14:15]
-; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s6, 0, v3, s6
-; GFX10-NEXT: v_mad_u64_u32 v[13:14], s6, v4, v9, v[11:12]
-; GFX10-NEXT: v_mad_u64_u32 v[3:4], s8, v16, v9, v[1:2]
-; GFX10-NEXT: v_cndmask_b32_e64 v16, 0, 1, s8
-; GFX10-NEXT: v_mad_u64_u32 v[9:10], s8, v19, v8, v[20:21]
-; GFX10-NEXT: v_add_co_ci_u32_e64 v15, s8, 0, v15, s8
-; GFX10-NEXT: v_mad_u64_u32 v[11:12], s8, v5, v8, v[13:14]
-; GFX10-NEXT: v_mad_u64_u32 v[1:2], s9, v17, v8, v[3:4]
-; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s9, v16, v9, s9
-; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s9, v29, v10, s9
-; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s9, v15, v11, s9
-; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s9, v6, v12, s9
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s9, v26, v22, s9
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s8, v9, v23, s8
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s6, v9, v25, s6
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s6, v9, v24, s7
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s5, v9, v30, s5
-; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v9, v28, s4
-; GFX10-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v9, v27, vcc_lo
-; GFX10-NEXT: v_mad_u64_u32 v[7:8], s4, v7, v8, v[9:10]
+; GFX10-NEXT: v_mad_u64_u32 v[0:1], s6, v16, v8, 0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v28, s4, 0, v28, s4
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s5, v17, v12, v[23:24]
+; GFX10-NEXT: v_mad_u64_u32 v[23:24], s6, v16, v11, v[3:4]
+; GFX10-NEXT: v_cndmask_b32_e64 v14, 0, 1, s6
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s4, v18, v11, v[5:6]
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s6, v17, v10, v[23:24]
+; GFX10-NEXT: v_mul_lo_u32 v23, v19, v12
+; GFX10-NEXT: v_mul_lo_u32 v24, v18, v13
+; GFX10-NEXT: v_mad_u64_u32 v[11:12], s7, v19, v10, v[3:4]
+; GFX10-NEXT: v_add_co_ci_u32_e64 v10, s6, 0, v14, s6
+; GFX10-NEXT: v_mad_u64_u32 v[3:4], s6, v18, v9, v[5:6]
+; GFX10-NEXT: v_add_co_ci_u32_e64 v14, s6, 0, v10, s6
+; GFX10-NEXT: v_mad_u64_u32 v[5:6], s6, v20, v9, v[11:12]
+; GFX10-NEXT: v_mad_u64_u32 v[10:11], s8, v16, v9, v[1:2]
+; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, 1, s8
+; GFX10-NEXT: v_mad_u64_u32 v[12:13], s8, v19, v8, v[3:4]
+; GFX10-NEXT: v_add_co_ci_u32_e64 v16, s8, 0, v14, s8
+; GFX10-NEXT: v_mad_u64_u32 v[14:15], s8, v21, v8, v[5:6]
+; GFX10-NEXT: v_mad_u64_u32 v[1:2], s9, v17, v8, v[10:11]
+; GFX10-NEXT: v_add_co_ci_u32_e64 v3, s9, v9, v12, s9
+; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s9, v28, v13, s9
+; GFX10-NEXT: v_add_co_ci_u32_e64 v5, s9, v16, v14, s9
+; GFX10-NEXT: v_add_co_ci_u32_e64 v6, s9, v25, v15, s9
+; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s9, v7, v30, s9
+; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s8, v7, v31, s8
+; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s6, v7, v24, s6
+; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s6, v7, v23, s7
+; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, v7, v29, s4
+; GFX10-NEXT: v_add_co_ci_u32_e64 v7, s4, v7, v26, s5
+; GFX10-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v27, vcc_lo
+; GFX10-NEXT: v_mad_u64_u32 v[7:8], s4, v22, v8, v[7:8]
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_mul_i256:
@@ -2688,67 +2687,67 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
; GFX11-NEXT: v_dual_mov_b32 v18, v2 :: v_dual_mov_b32 v19, v3
-; GFX11-NEXT: v_dual_mov_b32 v20, v8 :: v_dual_mov_b32 v21, v7
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v16, v14, 0
-; GFX11-NEXT: v_mul_lo_u32 v27, v6, v9
-; GFX11-NEXT: v_mul_lo_u32 v30, v4, v11
-; GFX11-NEXT: v_mul_lo_u32 v31, v17, v14
-; GFX11-NEXT: v_mul_lo_u32 v28, v5, v10
+; GFX11-NEXT: v_dual_mov_b32 v20, v4 :: v_dual_mov_b32 v21, v5
+; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v16, v14, 0
+; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v23, v7
+; GFX11-NEXT: v_mov_b32_e32 v22, v8
+; GFX11-NEXT: v_mad_u64_u32 v[26:27], null, v16, v10, 0
+; GFX11-NEXT: v_mul_lo_u32 v28, v0, v9
+; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v17, v13, v[1:2]
+; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v16, v12, 0
+; GFX11-NEXT: v_mul_lo_u32 v30, v20, v11
; GFX11-NEXT: v_mul_lo_u32 v15, v16, v15
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v17, v13, v[0:1]
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v18, v12, v[2:3]
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v16, v12, 0
-; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v19, v11, v[0:1]
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v17, v11, v[2:3]
-; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, 1, s0
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v4, v10, v[7:8]
-; GFX11-NEXT: v_mad_u64_u32 v[7:8], vcc_lo, v18, v10, v[0:1]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v22, vcc_lo
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v5, v9, v[2:3]
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v16, v10, 0
-; GFX11-NEXT: v_mad_u64_u32 v[25:26], vcc_lo, v19, v9, v[7:8]
-; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v6, v20, v[0:1]
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v17, v9, v[2:3]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v22, vcc_lo
-; GFX11-NEXT: v_mad_u64_u32 v[23:24], vcc_lo, v4, v20, v[25:26]
-; GFX11-NEXT: v_mov_b32_e32 v25, v7
-; GFX11-NEXT: v_cndmask_b32_e64 v22, 0, 1, s0
-; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v18, v20, v[0:1]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v6, vcc_lo
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], vcc_lo, v16, v13, v[24:25]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v29, null, 0, v22, s0
-; GFX11-NEXT: v_mov_b32_e32 v22, v3
-; GFX11-NEXT: v_mad_u64_u32 v[6:7], s0, v17, v12, v[0:1]
-; GFX11-NEXT: v_mad_u64_u32 v[24:25], s2, v16, v11, v[22:23]
-; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
-; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v16, v20, 0
-; GFX11-NEXT: v_mad_u64_u32 v[22:23], s1, v18, v11, v[6:7]
-; GFX11-NEXT: v_mad_u64_u32 v[6:7], s2, v17, v10, v[24:25]
+; GFX11-NEXT: v_mul_lo_u32 v14, v17, v14
+; GFX11-NEXT: v_mad_u64_u32 v[5:6], null, v18, v12, v[3:4]
+; GFX11-NEXT: v_mad_u64_u32 v[3:4], s0, v17, v11, v[1:2]
+; GFX11-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
+; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v19, v11, v[5:6]
+; GFX11-NEXT: v_mad_u64_u32 v[5:6], vcc_lo, v18, v10, v[3:4]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, 0, v7, vcc_lo
+; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v20, v10, v[1:2]
+; GFX11-NEXT: v_mad_u64_u32 v[1:2], vcc_lo, v19, v9, v[5:6]
+; GFX11-NEXT: v_mad_u64_u32 v[24:25], null, v21, v9, v[3:4]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v7, vcc_lo
+; GFX11-NEXT: v_mad_u64_u32 v[4:5], vcc_lo, v20, v22, v[1:2]
+; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v22, v[24:25]
+; GFX11-NEXT: v_mad_u64_u32 v[0:1], s0, v17, v9, v[26:27]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v26, null, 0, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, s0
+; GFX11-NEXT: v_mul_lo_u32 v27, v21, v10
+; GFX11-NEXT: v_mad_u64_u32 v[24:25], vcc_lo, v16, v13, v[5:6]
+; GFX11-NEXT: v_mad_u64_u32 v[2:3], s0, v18, v22, v[0:1]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v29, null, 0, v8, s0
+; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v16, v22, 0
+; GFX11-NEXT: v_mad_u64_u32 v[5:6], s1, v17, v12, v[24:25]
+; GFX11-NEXT: v_mad_u64_u32 v[24:25], s2, v16, v11, v[3:4]
+; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, 1, s2
+; GFX11-NEXT: v_mad_u64_u32 v[3:4], s0, v18, v11, v[5:6]
+; GFX11-NEXT: v_mad_u64_u32 v[5:6], s2, v17, v10, v[24:25]
; GFX11-NEXT: v_mul_lo_u32 v24, v19, v12
-; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, s2
-; GFX11-NEXT: v_mad_u64_u32 v[11:12], s3, v19, v10, v[22:23]
-; GFX11-NEXT: v_mul_lo_u32 v22, v18, v13
-; GFX11-NEXT: v_mad_u64_u32 v[13:14], s2, v18, v9, v[6:7]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v18, null, 0, v3, s2
-; GFX11-NEXT: v_mad_u64_u32 v[6:7], s2, v4, v9, v[11:12]
-; GFX11-NEXT: v_mad_u64_u32 v[3:4], s4, v16, v9, v[1:2]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v8, null, 0, v8, s2
+; GFX11-NEXT: v_mul_lo_u32 v25, v18, v13
+; GFX11-NEXT: v_mad_u64_u32 v[11:12], s3, v19, v10, v[3:4]
+; GFX11-NEXT: v_mad_u64_u32 v[3:4], s2, v18, v9, v[5:6]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v13, null, 0, v8, s2
+; GFX11-NEXT: v_mad_u64_u32 v[5:6], s2, v20, v9, v[11:12]
+; GFX11-NEXT: v_mad_u64_u32 v[10:11], s4, v16, v9, v[1:2]
; GFX11-NEXT: v_cndmask_b32_e64 v16, 0, 1, s4
-; GFX11-NEXT: v_mad_u64_u32 v[9:10], s4, v19, v20, v[13:14]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v13, null, 0, v18, s4
-; GFX11-NEXT: v_mad_u64_u32 v[11:12], s4, v5, v20, v[6:7]
-; GFX11-NEXT: v_mad_u64_u32 v[1:2], s5, v17, v20, v[3:4]
-; GFX11-NEXT: v_add_co_ci_u32_e64 v3, s5, v16, v9, s5
-; GFX11-NEXT: v_add_co_ci_u32_e64 v4, s5, v29, v10, s5
-; GFX11-NEXT: v_add_co_ci_u32_e64 v5, s5, v13, v11, s5
-; GFX11-NEXT: v_add_co_ci_u32_e64 v6, s5, v26, v12, s5
-; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v8, v15, s5
-; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v31, s4
-; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v22, s2
+; GFX11-NEXT: v_mad_u64_u32 v[8:9], s4, v19, v22, v[3:4]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v18, null, 0, v13, s4
+; GFX11-NEXT: v_mad_u64_u32 v[12:13], s4, v21, v22, v[5:6]
+; GFX11-NEXT: v_mad_u64_u32 v[1:2], s5, v17, v22, v[10:11]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v3, s5, v16, v8, s5
+; GFX11-NEXT: v_add_co_ci_u32_e64 v4, s5, v29, v9, s5
+; GFX11-NEXT: v_add_co_ci_u32_e64 v5, s5, v18, v12, s5
+; GFX11-NEXT: v_add_co_ci_u32_e64 v6, s5, v26, v13, s5
+; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v15, s5
+; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v14, s4
+; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v25, s2
; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v24, s3
-; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v30, s1
-; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v28, s0
-; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, v7, v27, vcc_lo
-; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v21, v20, v[9:10]
+; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v30, s0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v27, s1
+; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, v7, v28, vcc_lo
+; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v23, v22, v[9:10]
; GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GFX12-LABEL: v_mul_i256:
@@ -2760,99 +2759,99 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v16, v0 :: v_dual_mov_b32 v17, v1
; GFX12-NEXT: v_dual_mov_b32 v18, v2 :: v_dual_mov_b32 v19, v3
-; GFX12-NEXT: v_mul_lo_u32 v27, v6, v9
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v16, v14, 0
-; GFX12-NEXT: v_mul_lo_u32 v30, v4, v11
-; GFX12-NEXT: v_mul_lo_u32 v28, v5, v10
-; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, v17, v13, v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v18, v12, v[2:3]
-; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, v16, v12, 0
-; GFX12-NEXT: v_mad_co_u64_u32 v[20:21], null, v19, v11, v[0:1]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v11, v[2:3]
+; GFX12-NEXT: v_dual_mov_b32 v20, v4 :: v_dual_mov_b32 v21, v5
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v16, v14, 0
+; GFX12-NEXT: v_mov_b32_e32 v0, v6
+; GFX12-NEXT: v_mov_b32_e32 v22, v7
+; GFX12-NEXT: v_mad_co_u64_u32 v[25:26], null, v16, v10, 0
+; GFX12-NEXT: v_mul_lo_u32 v31, v17, v14
+; GFX12-NEXT: v_mul_lo_u32 v27, v0, v9
+; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v17, v13, v[1:2]
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v16, v12, 0
+; GFX12-NEXT: v_mul_lo_u32 v29, v20, v11
+; GFX12-NEXT: v_mul_lo_u32 v30, v16, v15
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], null, v18, v12, v[3:4]
+; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], s0, v17, v11, v[1:2]
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_cndmask_b32_e64 v22, 0, 1, s0
-; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, v4, v10, v[20:21]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_mad_co_u64_u32 v[20:21], vcc_lo, v18, v10, v[0:1]
+; GFX12-NEXT: v_cndmask_b32_e64 v7, 0, 1, s0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], null, v19, v11, v[5:6]
+; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], vcc_lo, v18, v10, v[3:4]
; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_add_co_ci_u32_e64 v22, null, 0, v22, vcc_lo
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v5, v9, v[2:3]
-; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, v16, v10, 0
-; GFX12-NEXT: v_mad_co_u64_u32 v[23:24], vcc_lo, v19, v9, v[20:21]
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_mad_co_u64_u32 v[25:26], null, v6, v8, v[0:1]
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v9, v[2:3]
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, 0, v7, vcc_lo
+; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], null, v20, v10, v[1:2]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], vcc_lo, v19, v9, v[5:6]
+; GFX12-NEXT: v_mad_co_u64_u32 v[23:24], null, v21, v9, v[3:4]
; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v22, vcc_lo
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-NEXT: v_mad_co_u64_u32 v[21:22], vcc_lo, v4, v8, v[23:24]
-; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_cndmask_b32_e64 v20, 0, 1, s0
+; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v7, vcc_lo
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-NEXT: v_mad_co_u64_u32 v[4:5], vcc_lo, v20, v8, v[1:2]
+; GFX12-NEXT: v_mad_co_u64_u32 v[6:7], null, v0, v8, v[23:24]
+; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], s0, v17, v9, v[25:26]
; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_add_co_ci_u32_e64 v6, null, 0, v6, vcc_lo
-; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], s0, v18, v8, v[0:1]
-; GFX12-NEXT: v_mov_b32_e32 v23, v25
+; GFX12-NEXT: v_add_co_ci_u32_e64 v25, null, 0, v3, vcc_lo
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v29, null, 0, v20, s0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_mov_b32_e32 v20, v3
-; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], vcc_lo, v16, v13, v[22:23]
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_mad_co_u64_u32 v[24:25], s2, v16, v11, v[20:21]
+; GFX12-NEXT: v_cndmask_b32_e64 v28, 0, 1, s0
+; GFX12-NEXT: v_mul_lo_u32 v26, v21, v10
+; GFX12-NEXT: v_mad_co_u64_u32 v[23:24], vcc_lo, v16, v13, v[5:6]
+; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], s0, v18, v8, v[0:1]
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
-; GFX12-NEXT: v_mad_co_u64_u32 v[22:23], s0, v17, v12, v[0:1]
+; GFX12-NEXT: v_add_co_ci_u32_e64 v28, null, 0, v28, s0
; GFX12-NEXT: v_mad_co_u64_u32 v[0:1], null, v16, v8, 0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_mad_co_u64_u32 v[20:21], s1, v18, v11, v[22:23]
-; GFX12-NEXT: v_mul_lo_u32 v22, v16, v15
-; GFX12-NEXT: v_mul_lo_u32 v23, v17, v14
-; GFX12-NEXT: v_mad_co_u64_u32 v[14:15], s2, v17, v10, v[24:25]
-; GFX12-NEXT: v_mul_lo_u32 v24, v19, v12
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], s1, v17, v12, v[23:24]
+; GFX12-NEXT: v_mad_co_u64_u32 v[23:24], s2, v16, v11, v[3:4]
+; GFX12-NEXT: s_wait_alu 0xf1ff
+; GFX12-NEXT: v_cndmask_b32_e64 v14, 0, 1, s2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], s0, v18, v11, v[5:6]
+; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], s2, v17, v10, v[23:24]
+; GFX12-NEXT: v_mul_lo_u32 v23, v19, v12
+; GFX12-NEXT: v_mul_lo_u32 v24, v18, v13
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_mad_co_u64_u32 v[11:12], s3, v19, v10, v[3:4]
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, s2
-; GFX12-NEXT: v_mad_co_u64_u32 v[11:12], s3, v19, v10, v[20:21]
-; GFX12-NEXT: v_mul_lo_u32 v25, v18, v13
-; GFX12-NEXT: v_mad_co_u64_u32 v[20:21], s2, v18, v9, v[14:15]
+; GFX12-NEXT: v_add_co_ci_u32_e64 v10, null, 0, v14, s2
+; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], s2, v18, v9, v[5:6]
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v15, null, 0, v3, s2
+; GFX12-NEXT: v_add_co_ci_u32_e64 v14, null, 0, v10, s2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX12-NEXT: v_mad_co_u64_u32 v[13:14], s2, v4, v9, v[11:12]
-; GFX12-NEXT: v_mad_co_u64_u32 v[3:4], s4, v16, v9, v[1:2]
+; GFX12-NEXT: v_mad_co_u64_u32 v[5:6], s2, v20, v9, v[11:12]
+; GFX12-NEXT: v_mad_co_u64_u32 v[10:11], s4, v16, v9, v[1:2]
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_cndmask_b32_e64 v16, 0, 1, s4
-; GFX12-NEXT: v_mad_co_u64_u32 v[9:10], s4, v19, v8, v[20:21]
+; GFX12-NEXT: v_cndmask_b32_e64 v9, 0, 1, s4
+; GFX12-NEXT: v_mad_co_u64_u32 v[12:13], s4, v19, v8, v[3:4]
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v15, null, 0, v15, s4
-; GFX12-NEXT: v_mad_co_u64_u32 v[11:12], s4, v5, v8, v[13:14]
-; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], s5, v17, v8, v[3:4]
+; GFX12-NEXT: v_add_co_ci_u32_e64 v16, null, 0, v14, s4
+; GFX12-NEXT: v_mad_co_u64_u32 v[14:15], s4, v21, v8, v[5:6]
+; GFX12-NEXT: v_mad_co_u64_u32 v[1:2], s5, v17, v8, v[10:11]
; GFX12-NEXT: s_wait_alu 0xf1ff
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX12-NEXT: v_add_co_ci_u32_e64 v3, s5, v16, v9, s5
+; GFX12-NEXT: v_add_co_ci_u32_e64 v3, s5, v9, v12, s5
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v4, s5, v29, v10, s5
+; GFX12-NEXT: v_add_co_ci_u32_e64 v4, s5, v28, v13, s5
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v5, s5, v15, v11, s5
+; GFX12-NEXT: v_add_co_ci_u32_e64 v5, s5, v16, v14, s5
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v6, s5, v6, v12, s5
+; GFX12-NEXT: v_add_co_ci_u32_e64 v6, s5, v25, v15, s5
; GFX12-NEXT: s_wait_alu 0xf1ff
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v26, v22, s5
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v30, s5
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v9, v23, s4
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v9, v25, s2
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v31, s4
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v24, s2
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v9, v24, s3
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v9, v30, s1
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v23, s3
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v29, s0
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v9, v28, s0
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v26, s1
; GFX12-NEXT: s_wait_alu 0xfffd
-; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, v9, v27, vcc_lo
+; GFX12-NEXT: v_add_co_ci_u32_e64 v7, null, v7, v27, vcc_lo
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT: v_mad_co_u64_u32 v[7:8], null, v7, v8, v[9:10]
+; GFX12-NEXT: v_mad_co_u64_u32 v[7:8], null, v22, v8, v[7:8]
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
; GFX1250-LABEL: v_mul_i256:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
index 231460f584a2e..a498525c92360 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll
@@ -2853,52 +2853,50 @@ define inreg <4 x i32> @bitcast_v8bf16_to_v4i32_scalar(<8 x bfloat> inreg %a, i3
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s24, 0
-; SI-NEXT: v_mul_f32_e64 v15, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v8, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v13, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v6, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v4, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v16, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v9, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v7, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v5, 1.0, s22
; SI-NEXT: s_cbranch_scc0 .LBB23_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15
-; SI-NEXT: v_lshr_b64 v[0:1], v[10:11], 16
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v12
-; SI-NEXT: v_lshr_b64 v[1:2], v[8:9], 16
-; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v13
-; SI-NEXT: v_lshr_b64 v[16:17], v[4:5], 16
-; SI-NEXT: v_lshr_b64 v[2:3], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v3, v16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v16
+; SI-NEXT: v_lshr_b64 v[0:1], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v15
+; SI-NEXT: v_lshr_b64 v[1:2], v[9:10], 16
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v14
+; SI-NEXT: v_lshr_b64 v[2:3], v[7:8], 16
+; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v13
+; SI-NEXT: v_lshr_b64 v[3:4], v[5:6], 16
; SI-NEXT: s_cbranch_execnz .LBB23_3
; SI-NEXT: .LBB23_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v15
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v14
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v8
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v15
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v9
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
; SI-NEXT: v_lshr_b64 v[1:2], v[1:2], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v13
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v6
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v7
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
-; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_mov_b32_e32 v3, v4
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v13
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
+; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
+; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
+; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: .LBB23_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB23_4:
@@ -7396,52 +7394,50 @@ define inreg <4 x float> @bitcast_v8bf16_to_v4f32_scalar(<8 x bfloat> inreg %a,
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s24, 0
-; SI-NEXT: v_mul_f32_e64 v15, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v8, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v13, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v6, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v4, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v16, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v9, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v7, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v5, 1.0, s22
; SI-NEXT: s_cbranch_scc0 .LBB47_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15
-; SI-NEXT: v_lshr_b64 v[0:1], v[10:11], 16
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v12
-; SI-NEXT: v_lshr_b64 v[1:2], v[8:9], 16
-; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v13
-; SI-NEXT: v_lshr_b64 v[16:17], v[4:5], 16
-; SI-NEXT: v_lshr_b64 v[2:3], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v3, v16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v16
+; SI-NEXT: v_lshr_b64 v[0:1], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v15
+; SI-NEXT: v_lshr_b64 v[1:2], v[9:10], 16
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v14
+; SI-NEXT: v_lshr_b64 v[2:3], v[7:8], 16
+; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v13
+; SI-NEXT: v_lshr_b64 v[3:4], v[5:6], 16
; SI-NEXT: s_cbranch_execnz .LBB47_3
; SI-NEXT: .LBB47_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v15
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v14
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v8
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v15
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v9
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
; SI-NEXT: v_lshr_b64 v[1:2], v[1:2], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v13
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v6
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v7
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
-; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_mov_b32_e32 v3, v4
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v13
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
+; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
+; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
+; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: .LBB47_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB47_4:
@@ -11589,52 +11585,50 @@ define inreg <2 x i64> @bitcast_v8bf16_to_v2i64_scalar(<8 x bfloat> inreg %a, i3
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s24, 0
-; SI-NEXT: v_mul_f32_e64 v15, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v8, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v13, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v6, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v4, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v16, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v9, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v7, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v5, 1.0, s22
; SI-NEXT: s_cbranch_scc0 .LBB67_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15
-; SI-NEXT: v_lshr_b64 v[0:1], v[10:11], 16
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v12
-; SI-NEXT: v_lshr_b64 v[1:2], v[8:9], 16
-; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v13
-; SI-NEXT: v_lshr_b64 v[16:17], v[4:5], 16
-; SI-NEXT: v_lshr_b64 v[2:3], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v3, v16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v16
+; SI-NEXT: v_lshr_b64 v[0:1], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v15
+; SI-NEXT: v_lshr_b64 v[1:2], v[9:10], 16
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v14
+; SI-NEXT: v_lshr_b64 v[2:3], v[7:8], 16
+; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v13
+; SI-NEXT: v_lshr_b64 v[3:4], v[5:6], 16
; SI-NEXT: s_cbranch_execnz .LBB67_3
; SI-NEXT: .LBB67_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v15
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v14
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v8
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v15
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v9
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
; SI-NEXT: v_lshr_b64 v[1:2], v[1:2], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v13
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v6
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v7
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
-; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_mov_b32_e32 v3, v4
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v13
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
+; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
+; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
+; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: .LBB67_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB67_4:
@@ -15361,52 +15355,50 @@ define inreg <2 x double> @bitcast_v8bf16_to_v2f64_scalar(<8 x bfloat> inreg %a,
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s24, 0
-; SI-NEXT: v_mul_f32_e64 v15, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v8, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v13, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v6, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v4, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v16, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v9, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v14, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v7, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v5, 1.0, s22
; SI-NEXT: s_cbranch_scc0 .LBB83_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15
-; SI-NEXT: v_lshr_b64 v[0:1], v[10:11], 16
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v12
-; SI-NEXT: v_lshr_b64 v[1:2], v[8:9], 16
-; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v13
-; SI-NEXT: v_lshr_b64 v[16:17], v[4:5], 16
-; SI-NEXT: v_lshr_b64 v[2:3], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v3, v16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v16
+; SI-NEXT: v_lshr_b64 v[0:1], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v15
+; SI-NEXT: v_lshr_b64 v[1:2], v[9:10], 16
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v14
+; SI-NEXT: v_lshr_b64 v[2:3], v[7:8], 16
+; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v13
+; SI-NEXT: v_lshr_b64 v[3:4], v[5:6], 16
; SI-NEXT: s_cbranch_execnz .LBB83_3
; SI-NEXT: .LBB83_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v15
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v14
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v8
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v15
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v9
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
; SI-NEXT: v_lshr_b64 v[1:2], v[1:2], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v13
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v4
-; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v6
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v7
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
-; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
-; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_mov_b32_e32 v3, v4
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v13
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5
+; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
+; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
+; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
+; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: .LBB83_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB83_4:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
index 155ec568a65d3..b846e0ee0a12f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll
@@ -4052,92 +4052,90 @@ define inreg <8 x i32> @bitcast_v16bf16_to_v8i32_scalar(<16 x bfloat> inreg %a,
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: v_mul_f32_e64 v31, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v22, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v30, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v20, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v29, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v18, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v28, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v16, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v27, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v26, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v25, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v8, 1.0, v0
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v32, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v23, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v31, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v21, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v30, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v19, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v29, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v17, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v28, 1.0, s25
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v27, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v26, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v9, 1.0, v0
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB23_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v31
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v30
-; SI-NEXT: v_lshr_b64 v[0:1], v[22:23], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v29
-; SI-NEXT: v_lshr_b64 v[2:3], v[18:19], 16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v28
-; SI-NEXT: v_lshr_b64 v[3:4], v[16:17], 16
-; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v27
-; SI-NEXT: v_lshr_b64 v[4:5], v[14:15], 16
-; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v26
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v24
-; SI-NEXT: v_lshr_b64 v[5:6], v[12:13], 16
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v25
-; SI-NEXT: v_lshr_b64 v[32:33], v[8:9], 16
-; SI-NEXT: v_lshr_b64 v[6:7], v[10:11], 16
-; SI-NEXT: v_mov_b32_e32 v7, v32
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v32
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v31
+; SI-NEXT: v_lshr_b64 v[0:1], v[23:24], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v30
+; SI-NEXT: v_lshr_b64 v[2:3], v[19:20], 16
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v29
+; SI-NEXT: v_lshr_b64 v[3:4], v[17:18], 16
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v28
+; SI-NEXT: v_lshr_b64 v[4:5], v[15:16], 16
+; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v27
+; SI-NEXT: v_lshr_b64 v[5:6], v[13:14], 16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v26
+; SI-NEXT: v_lshr_b64 v[6:7], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v25
+; SI-NEXT: v_lshr_b64 v[7:8], v[9:10], 16
; SI-NEXT: s_cbranch_execnz .LBB23_3
; SI-NEXT: .LBB23_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v31
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v29
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v28
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v29
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v17
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v27
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v15
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v26
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v27
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v13
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v24
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v25
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
-; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v7, v8
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v25
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
+; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
+; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
; SI-NEXT: .LBB23_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB23_4:
@@ -11209,92 +11207,90 @@ define inreg <8 x float> @bitcast_v16bf16_to_v8f32_scalar(<16 x bfloat> inreg %a
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: v_mul_f32_e64 v31, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v22, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v30, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v20, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v29, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v18, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v28, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v16, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v27, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v26, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v25, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v8, 1.0, v0
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v32, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v23, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v31, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v21, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v30, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v19, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v29, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v17, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v28, 1.0, s25
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v27, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v26, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v9, 1.0, v0
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB47_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v31
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v30
-; SI-NEXT: v_lshr_b64 v[0:1], v[22:23], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v29
-; SI-NEXT: v_lshr_b64 v[2:3], v[18:19], 16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v28
-; SI-NEXT: v_lshr_b64 v[3:4], v[16:17], 16
-; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v27
-; SI-NEXT: v_lshr_b64 v[4:5], v[14:15], 16
-; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v26
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v24
-; SI-NEXT: v_lshr_b64 v[5:6], v[12:13], 16
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v25
-; SI-NEXT: v_lshr_b64 v[32:33], v[8:9], 16
-; SI-NEXT: v_lshr_b64 v[6:7], v[10:11], 16
-; SI-NEXT: v_mov_b32_e32 v7, v32
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v32
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v31
+; SI-NEXT: v_lshr_b64 v[0:1], v[23:24], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v30
+; SI-NEXT: v_lshr_b64 v[2:3], v[19:20], 16
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v29
+; SI-NEXT: v_lshr_b64 v[3:4], v[17:18], 16
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v28
+; SI-NEXT: v_lshr_b64 v[4:5], v[15:16], 16
+; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v27
+; SI-NEXT: v_lshr_b64 v[5:6], v[13:14], 16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v26
+; SI-NEXT: v_lshr_b64 v[6:7], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v25
+; SI-NEXT: v_lshr_b64 v[7:8], v[9:10], 16
; SI-NEXT: s_cbranch_execnz .LBB47_3
; SI-NEXT: .LBB47_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v31
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v29
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v28
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v29
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v17
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v27
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v15
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v26
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v27
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v13
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v24
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v25
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
-; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v7, v8
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v25
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
+; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
+; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
; SI-NEXT: .LBB47_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB47_4:
@@ -17934,92 +17930,90 @@ define inreg <4 x i64> @bitcast_v16bf16_to_v4i64_scalar(<16 x bfloat> inreg %a,
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: v_mul_f32_e64 v31, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v22, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v30, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v20, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v29, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v18, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v28, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v16, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v27, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v26, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v25, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v8, 1.0, v0
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v32, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v23, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v31, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v21, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v30, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v19, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v29, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v17, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v28, 1.0, s25
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v27, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v26, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v9, 1.0, v0
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB67_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v31
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v30
-; SI-NEXT: v_lshr_b64 v[0:1], v[22:23], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v29
-; SI-NEXT: v_lshr_b64 v[2:3], v[18:19], 16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v28
-; SI-NEXT: v_lshr_b64 v[3:4], v[16:17], 16
-; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v27
-; SI-NEXT: v_lshr_b64 v[4:5], v[14:15], 16
-; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v26
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v24
-; SI-NEXT: v_lshr_b64 v[5:6], v[12:13], 16
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v25
-; SI-NEXT: v_lshr_b64 v[32:33], v[8:9], 16
-; SI-NEXT: v_lshr_b64 v[6:7], v[10:11], 16
-; SI-NEXT: v_mov_b32_e32 v7, v32
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v32
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v31
+; SI-NEXT: v_lshr_b64 v[0:1], v[23:24], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v30
+; SI-NEXT: v_lshr_b64 v[2:3], v[19:20], 16
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v29
+; SI-NEXT: v_lshr_b64 v[3:4], v[17:18], 16
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v28
+; SI-NEXT: v_lshr_b64 v[4:5], v[15:16], 16
+; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v27
+; SI-NEXT: v_lshr_b64 v[5:6], v[13:14], 16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v26
+; SI-NEXT: v_lshr_b64 v[6:7], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v25
+; SI-NEXT: v_lshr_b64 v[7:8], v[9:10], 16
; SI-NEXT: s_cbranch_execnz .LBB67_3
; SI-NEXT: .LBB67_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v31
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v29
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v28
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v29
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v17
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v27
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v15
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v26
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v27
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v13
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v24
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v25
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
-; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v7, v8
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v25
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
+; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
+; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
; SI-NEXT: .LBB67_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB67_4:
@@ -24107,92 +24101,90 @@ define inreg <4 x double> @bitcast_v16bf16_to_v4f64_scalar(<16 x bfloat> inreg %
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: v_mul_f32_e64 v31, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v22, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v30, 1.0, s19
-; SI-NEXT: v_mul_f32_e64 v20, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v29, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v18, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v28, 1.0, s23
-; SI-NEXT: v_mul_f32_e64 v16, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v27, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v14, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v26, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v25, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v8, 1.0, v0
-; SI-NEXT: v_mul_f32_e64 v12, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v10, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v32, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v23, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v31, 1.0, s19
+; SI-NEXT: v_mul_f32_e64 v21, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v30, 1.0, s21
+; SI-NEXT: v_mul_f32_e64 v19, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v29, 1.0, s23
+; SI-NEXT: v_mul_f32_e64 v17, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v28, 1.0, s25
+; SI-NEXT: v_mul_f32_e64 v15, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v27, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v26, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v9, 1.0, v0
+; SI-NEXT: v_mul_f32_e64 v13, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v11, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB83_4
; SI-NEXT: ; %bb.1: ; %cmp.false
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v31
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v30
-; SI-NEXT: v_lshr_b64 v[0:1], v[22:23], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v29
-; SI-NEXT: v_lshr_b64 v[2:3], v[18:19], 16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v28
-; SI-NEXT: v_lshr_b64 v[3:4], v[16:17], 16
-; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v27
-; SI-NEXT: v_lshr_b64 v[4:5], v[14:15], 16
-; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v26
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v24
-; SI-NEXT: v_lshr_b64 v[5:6], v[12:13], 16
-; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v25
-; SI-NEXT: v_lshr_b64 v[32:33], v[8:9], 16
-; SI-NEXT: v_lshr_b64 v[6:7], v[10:11], 16
-; SI-NEXT: v_mov_b32_e32 v7, v32
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v32
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v31
+; SI-NEXT: v_lshr_b64 v[0:1], v[23:24], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v30
+; SI-NEXT: v_lshr_b64 v[2:3], v[19:20], 16
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v29
+; SI-NEXT: v_lshr_b64 v[3:4], v[17:18], 16
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v28
+; SI-NEXT: v_lshr_b64 v[4:5], v[15:16], 16
+; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v27
+; SI-NEXT: v_lshr_b64 v[5:6], v[13:14], 16
+; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v26
+; SI-NEXT: v_lshr_b64 v[6:7], v[11:12], 16
+; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v25
+; SI-NEXT: v_lshr_b64 v[7:8], v[9:10], 16
; SI-NEXT: s_cbranch_execnz .LBB83_3
; SI-NEXT: .LBB83_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v31
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v29
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v28
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v16
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v29
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v17
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v27
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v14
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v15
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v26
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v12
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v27
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v13
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v24
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v25
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v8
-; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v10
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v11
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
-; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
-; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_mov_b32_e32 v7, v8
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v25
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v9
+; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
+; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
+; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
+; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
; SI-NEXT: .LBB83_3: ; %end
; SI-NEXT: s_setpc_b64 s[30:31]
; SI-NEXT: .LBB83_4:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
index a7f89579b5ce0..9c05297f7bcae 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll
@@ -6594,176 +6594,174 @@ define inreg <16 x i32> @bitcast_v32bf16_to_v16i32_scalar(<32 x bfloat> inreg %a
; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: s_waitcnt expcnt(3)
-; SI-NEXT: v_mul_f32_e64 v60, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v59, 1.0, s19
-; SI-NEXT: v_mul_f32_e32 v56, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v47, 1.0, v3
-; SI-NEXT: v_mul_f32_e32 v46, 1.0, v5
-; SI-NEXT: v_mul_f32_e32 v45, 1.0, v7
-; SI-NEXT: v_mul_f32_e32 v44, 1.0, v9
-; SI-NEXT: v_mul_f32_e32 v43, 1.0, v11
-; SI-NEXT: v_mul_f32_e32 v42, 1.0, v13
-; SI-NEXT: v_mul_f32_e32 v41, 1.0, v15
-; SI-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_mul_f32_e64 v63, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v62, 1.0, s23
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mul_f32_e64 v62, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v60, 1.0, s19
+; SI-NEXT: v_mul_f32_e32 v57, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v56, 1.0, v3
+; SI-NEXT: v_mul_f32_e32 v47, 1.0, v5
+; SI-NEXT: v_mul_f32_e32 v46, 1.0, v7
+; SI-NEXT: v_mul_f32_e32 v45, 1.0, v9
+; SI-NEXT: v_mul_f32_e32 v44, 1.0, v11
+; SI-NEXT: v_mul_f32_e32 v43, 1.0, v13
+; SI-NEXT: v_mul_f32_e32 v42, 1.0, v15
+; SI-NEXT: v_mul_f32_e32 v18, 1.0, v17
+; SI-NEXT: v_mul_f32_e64 v41, 1.0, s21
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mul_f32_e64 v63, 1.0, s23
; SI-NEXT: v_mul_f32_e64 v61, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v58, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v57, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v32, 1.0, v0
-; SI-NEXT: v_mul_f32_e32 v30, 1.0, v2
-; SI-NEXT: v_mul_f32_e32 v28, 1.0, v4
-; SI-NEXT: v_mul_f32_e32 v26, 1.0, v6
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v8
-; SI-NEXT: v_mul_f32_e32 v22, 1.0, v10
-; SI-NEXT: v_mul_f32_e32 v20, 1.0, v12
-; SI-NEXT: v_mul_f32_e32 v18, 1.0, v14
-; SI-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; SI-NEXT: v_mul_f32_e64 v54, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v52, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v50, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v48, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v38, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v36, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v34, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v59, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v58, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v33, 1.0, v0
+; SI-NEXT: v_mul_f32_e32 v31, 1.0, v2
+; SI-NEXT: v_mul_f32_e32 v29, 1.0, v4
+; SI-NEXT: v_mul_f32_e32 v27, 1.0, v6
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v8
+; SI-NEXT: v_mul_f32_e32 v23, 1.0, v10
+; SI-NEXT: v_mul_f32_e32 v21, 1.0, v12
+; SI-NEXT: v_mul_f32_e32 v19, 1.0, v14
+; SI-NEXT: v_mul_f32_e32 v17, 1.0, v16
+; SI-NEXT: v_mul_f32_e64 v39, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v54, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v52, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v50, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v48, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v37, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v35, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB23_4
; SI-NEXT: ; %bb.1: ; %cmp.false
+; SI-NEXT: v_lshrrev_b32_e32 v40, 16, v62
; SI-NEXT: v_lshrrev_b32_e32 v55, 16, v60
-; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v59
-; SI-NEXT: v_lshr_b64 v[0:1], v[54:55], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[52:53], 16
+; SI-NEXT: v_lshr_b64 v[0:1], v[39:40], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[54:55], 16
+; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v41
+; SI-NEXT: v_lshr_b64 v[2:3], v[52:53], 16
; SI-NEXT: v_lshrrev_b32_e32 v51, 16, v63
-; SI-NEXT: v_lshr_b64 v[2:3], v[50:51], 16
-; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v62
-; SI-NEXT: v_lshr_b64 v[3:4], v[48:49], 16
-; SI-NEXT: v_lshrrev_b32_e32 v39, 16, v61
-; SI-NEXT: v_lshr_b64 v[4:5], v[38:39], 16
-; SI-NEXT: v_lshrrev_b32_e32 v37, 16, v58
-; SI-NEXT: v_lshr_b64 v[5:6], v[36:37], 16
-; SI-NEXT: v_lshrrev_b32_e32 v35, 16, v57
-; SI-NEXT: v_lshr_b64 v[6:7], v[34:35], 16
-; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v56
-; SI-NEXT: v_lshr_b64 v[7:8], v[32:33], 16
-; SI-NEXT: v_lshrrev_b32_e32 v31, 16, v47
-; SI-NEXT: v_lshr_b64 v[8:9], v[30:31], 16
-; SI-NEXT: v_lshrrev_b32_e32 v29, 16, v46
-; SI-NEXT: v_lshr_b64 v[9:10], v[28:29], 16
-; SI-NEXT: v_lshrrev_b32_e32 v27, 16, v45
-; SI-NEXT: v_lshr_b64 v[10:11], v[26:27], 16
-; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v44
-; SI-NEXT: v_lshr_b64 v[11:12], v[24:25], 16
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v43
-; SI-NEXT: v_lshr_b64 v[12:13], v[22:23], 16
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v42
-; SI-NEXT: v_lshr_b64 v[13:14], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v41
-; SI-NEXT: v_lshr_b64 v[14:15], v[18:19], 16
-; SI-NEXT: v_mov_b32_e32 v15, v17
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v15
-; SI-NEXT: v_lshr_b64 v[39:40], v[16:17], 16
-; SI-NEXT: v_mov_b32_e32 v17, v15
-; SI-NEXT: v_mov_b32_e32 v15, v39
+; SI-NEXT: v_lshr_b64 v[3:4], v[50:51], 16
+; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v61
+; SI-NEXT: v_lshr_b64 v[4:5], v[48:49], 16
+; SI-NEXT: v_lshrrev_b32_e32 v38, 16, v59
+; SI-NEXT: v_lshr_b64 v[5:6], v[37:38], 16
+; SI-NEXT: v_lshrrev_b32_e32 v36, 16, v58
+; SI-NEXT: v_lshr_b64 v[6:7], v[35:36], 16
+; SI-NEXT: v_lshrrev_b32_e32 v34, 16, v57
+; SI-NEXT: v_lshr_b64 v[7:8], v[33:34], 16
+; SI-NEXT: v_lshrrev_b32_e32 v32, 16, v56
+; SI-NEXT: v_lshr_b64 v[8:9], v[31:32], 16
+; SI-NEXT: v_lshrrev_b32_e32 v30, 16, v47
+; SI-NEXT: v_lshr_b64 v[9:10], v[29:30], 16
+; SI-NEXT: v_lshrrev_b32_e32 v28, 16, v46
+; SI-NEXT: v_lshr_b64 v[10:11], v[27:28], 16
+; SI-NEXT: v_lshrrev_b32_e32 v26, 16, v45
+; SI-NEXT: v_lshr_b64 v[11:12], v[25:26], 16
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v44
+; SI-NEXT: v_lshr_b64 v[12:13], v[23:24], 16
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v43
+; SI-NEXT: v_lshr_b64 v[13:14], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v42
+; SI-NEXT: v_lshr_b64 v[14:15], v[19:20], 16
+; SI-NEXT: v_mov_b32_e32 v20, v18
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v20
+; SI-NEXT: v_lshr_b64 v[15:16], v[17:18], 16
+; SI-NEXT: v_mov_b32_e32 v18, v20
; SI-NEXT: s_cbranch_execnz .LBB23_3
; SI-NEXT: .LBB23_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v60
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v54
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v62
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v39
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v60
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v54
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v63
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v50
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v41
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v62
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v48
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v63
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v50
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v61
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v38
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v48
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v58
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v36
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v37
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v57
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v34
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v58
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v35
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v56
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v57
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v33
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v47
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v56
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v46
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v47
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v29
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v10
; SI-NEXT: v_lshr_b64 v[9:10], v[9:10], 16
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v45
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v46
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v27
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v11
; SI-NEXT: v_lshr_b64 v[10:11], v[10:11], 16
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v44
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v24
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v45
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v25
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v12
; SI-NEXT: v_lshr_b64 v[11:12], v[11:12], 16
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v43
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v44
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; SI-NEXT: v_lshr_b64 v[12:13], v[12:13], 16
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v42
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v43
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; SI-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
; SI-NEXT: v_lshr_b64 v[13:14], v[13:14], 16
-; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v41
-; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; SI-NEXT: v_add_f32_e32 v17, 0x40c00000, v17
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v42
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
-; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v17
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; SI-NEXT: v_lshr_b64 v[16:17], v[16:17], 16
; SI-NEXT: v_lshr_b64 v[14:15], v[14:15], 16
-; SI-NEXT: v_mov_b32_e32 v15, v16
+; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v17
+; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
+; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; SI-NEXT: v_lshr_b64 v[15:16], v[15:16], 16
; SI-NEXT: .LBB23_3: ; %end
; SI-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -21622,176 +21620,174 @@ define inreg <16 x float> @bitcast_v32bf16_to_v16f32_scalar(<32 x bfloat> inreg
; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: s_waitcnt expcnt(3)
-; SI-NEXT: v_mul_f32_e64 v60, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v59, 1.0, s19
-; SI-NEXT: v_mul_f32_e32 v56, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v47, 1.0, v3
-; SI-NEXT: v_mul_f32_e32 v46, 1.0, v5
-; SI-NEXT: v_mul_f32_e32 v45, 1.0, v7
-; SI-NEXT: v_mul_f32_e32 v44, 1.0, v9
-; SI-NEXT: v_mul_f32_e32 v43, 1.0, v11
-; SI-NEXT: v_mul_f32_e32 v42, 1.0, v13
-; SI-NEXT: v_mul_f32_e32 v41, 1.0, v15
-; SI-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_mul_f32_e64 v63, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v62, 1.0, s23
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mul_f32_e64 v62, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v60, 1.0, s19
+; SI-NEXT: v_mul_f32_e32 v57, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v56, 1.0, v3
+; SI-NEXT: v_mul_f32_e32 v47, 1.0, v5
+; SI-NEXT: v_mul_f32_e32 v46, 1.0, v7
+; SI-NEXT: v_mul_f32_e32 v45, 1.0, v9
+; SI-NEXT: v_mul_f32_e32 v44, 1.0, v11
+; SI-NEXT: v_mul_f32_e32 v43, 1.0, v13
+; SI-NEXT: v_mul_f32_e32 v42, 1.0, v15
+; SI-NEXT: v_mul_f32_e32 v18, 1.0, v17
+; SI-NEXT: v_mul_f32_e64 v41, 1.0, s21
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mul_f32_e64 v63, 1.0, s23
; SI-NEXT: v_mul_f32_e64 v61, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v58, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v57, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v32, 1.0, v0
-; SI-NEXT: v_mul_f32_e32 v30, 1.0, v2
-; SI-NEXT: v_mul_f32_e32 v28, 1.0, v4
-; SI-NEXT: v_mul_f32_e32 v26, 1.0, v6
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v8
-; SI-NEXT: v_mul_f32_e32 v22, 1.0, v10
-; SI-NEXT: v_mul_f32_e32 v20, 1.0, v12
-; SI-NEXT: v_mul_f32_e32 v18, 1.0, v14
-; SI-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; SI-NEXT: v_mul_f32_e64 v54, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v52, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v50, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v48, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v38, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v36, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v34, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v59, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v58, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v33, 1.0, v0
+; SI-NEXT: v_mul_f32_e32 v31, 1.0, v2
+; SI-NEXT: v_mul_f32_e32 v29, 1.0, v4
+; SI-NEXT: v_mul_f32_e32 v27, 1.0, v6
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v8
+; SI-NEXT: v_mul_f32_e32 v23, 1.0, v10
+; SI-NEXT: v_mul_f32_e32 v21, 1.0, v12
+; SI-NEXT: v_mul_f32_e32 v19, 1.0, v14
+; SI-NEXT: v_mul_f32_e32 v17, 1.0, v16
+; SI-NEXT: v_mul_f32_e64 v39, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v54, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v52, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v50, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v48, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v37, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v35, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB47_4
; SI-NEXT: ; %bb.1: ; %cmp.false
+; SI-NEXT: v_lshrrev_b32_e32 v40, 16, v62
; SI-NEXT: v_lshrrev_b32_e32 v55, 16, v60
-; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v59
-; SI-NEXT: v_lshr_b64 v[0:1], v[54:55], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[52:53], 16
+; SI-NEXT: v_lshr_b64 v[0:1], v[39:40], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[54:55], 16
+; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v41
+; SI-NEXT: v_lshr_b64 v[2:3], v[52:53], 16
; SI-NEXT: v_lshrrev_b32_e32 v51, 16, v63
-; SI-NEXT: v_lshr_b64 v[2:3], v[50:51], 16
-; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v62
-; SI-NEXT: v_lshr_b64 v[3:4], v[48:49], 16
-; SI-NEXT: v_lshrrev_b32_e32 v39, 16, v61
-; SI-NEXT: v_lshr_b64 v[4:5], v[38:39], 16
-; SI-NEXT: v_lshrrev_b32_e32 v37, 16, v58
-; SI-NEXT: v_lshr_b64 v[5:6], v[36:37], 16
-; SI-NEXT: v_lshrrev_b32_e32 v35, 16, v57
-; SI-NEXT: v_lshr_b64 v[6:7], v[34:35], 16
-; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v56
-; SI-NEXT: v_lshr_b64 v[7:8], v[32:33], 16
-; SI-NEXT: v_lshrrev_b32_e32 v31, 16, v47
-; SI-NEXT: v_lshr_b64 v[8:9], v[30:31], 16
-; SI-NEXT: v_lshrrev_b32_e32 v29, 16, v46
-; SI-NEXT: v_lshr_b64 v[9:10], v[28:29], 16
-; SI-NEXT: v_lshrrev_b32_e32 v27, 16, v45
-; SI-NEXT: v_lshr_b64 v[10:11], v[26:27], 16
-; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v44
-; SI-NEXT: v_lshr_b64 v[11:12], v[24:25], 16
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v43
-; SI-NEXT: v_lshr_b64 v[12:13], v[22:23], 16
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v42
-; SI-NEXT: v_lshr_b64 v[13:14], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v41
-; SI-NEXT: v_lshr_b64 v[14:15], v[18:19], 16
-; SI-NEXT: v_mov_b32_e32 v15, v17
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v15
-; SI-NEXT: v_lshr_b64 v[39:40], v[16:17], 16
-; SI-NEXT: v_mov_b32_e32 v17, v15
-; SI-NEXT: v_mov_b32_e32 v15, v39
+; SI-NEXT: v_lshr_b64 v[3:4], v[50:51], 16
+; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v61
+; SI-NEXT: v_lshr_b64 v[4:5], v[48:49], 16
+; SI-NEXT: v_lshrrev_b32_e32 v38, 16, v59
+; SI-NEXT: v_lshr_b64 v[5:6], v[37:38], 16
+; SI-NEXT: v_lshrrev_b32_e32 v36, 16, v58
+; SI-NEXT: v_lshr_b64 v[6:7], v[35:36], 16
+; SI-NEXT: v_lshrrev_b32_e32 v34, 16, v57
+; SI-NEXT: v_lshr_b64 v[7:8], v[33:34], 16
+; SI-NEXT: v_lshrrev_b32_e32 v32, 16, v56
+; SI-NEXT: v_lshr_b64 v[8:9], v[31:32], 16
+; SI-NEXT: v_lshrrev_b32_e32 v30, 16, v47
+; SI-NEXT: v_lshr_b64 v[9:10], v[29:30], 16
+; SI-NEXT: v_lshrrev_b32_e32 v28, 16, v46
+; SI-NEXT: v_lshr_b64 v[10:11], v[27:28], 16
+; SI-NEXT: v_lshrrev_b32_e32 v26, 16, v45
+; SI-NEXT: v_lshr_b64 v[11:12], v[25:26], 16
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v44
+; SI-NEXT: v_lshr_b64 v[12:13], v[23:24], 16
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v43
+; SI-NEXT: v_lshr_b64 v[13:14], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v42
+; SI-NEXT: v_lshr_b64 v[14:15], v[19:20], 16
+; SI-NEXT: v_mov_b32_e32 v20, v18
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v20
+; SI-NEXT: v_lshr_b64 v[15:16], v[17:18], 16
+; SI-NEXT: v_mov_b32_e32 v18, v20
; SI-NEXT: s_cbranch_execnz .LBB47_3
; SI-NEXT: .LBB47_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v60
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v54
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v62
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v39
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v60
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v54
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v63
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v50
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v41
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v62
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v48
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v63
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v50
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v61
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v38
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v48
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v58
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v36
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v37
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v57
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v34
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v58
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v35
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v56
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v57
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v33
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v47
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v56
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v46
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v47
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v29
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v10
; SI-NEXT: v_lshr_b64 v[9:10], v[9:10], 16
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v45
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v46
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v27
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v11
; SI-NEXT: v_lshr_b64 v[10:11], v[10:11], 16
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v44
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v24
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v45
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v25
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v12
; SI-NEXT: v_lshr_b64 v[11:12], v[11:12], 16
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v43
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v44
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; SI-NEXT: v_lshr_b64 v[12:13], v[12:13], 16
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v42
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v43
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; SI-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
; SI-NEXT: v_lshr_b64 v[13:14], v[13:14], 16
-; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v41
-; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; SI-NEXT: v_add_f32_e32 v17, 0x40c00000, v17
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v42
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
-; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v17
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; SI-NEXT: v_lshr_b64 v[16:17], v[16:17], 16
; SI-NEXT: v_lshr_b64 v[14:15], v[14:15], 16
-; SI-NEXT: v_mov_b32_e32 v15, v16
+; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v17
+; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
+; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; SI-NEXT: v_lshr_b64 v[15:16], v[15:16], 16
; SI-NEXT: .LBB47_3: ; %end
; SI-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -36198,176 +36194,174 @@ define inreg <8 x i64> @bitcast_v32bf16_to_v8i64_scalar(<32 x bfloat> inreg %a,
; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: s_waitcnt expcnt(3)
-; SI-NEXT: v_mul_f32_e64 v60, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v59, 1.0, s19
-; SI-NEXT: v_mul_f32_e32 v56, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v47, 1.0, v3
-; SI-NEXT: v_mul_f32_e32 v46, 1.0, v5
-; SI-NEXT: v_mul_f32_e32 v45, 1.0, v7
-; SI-NEXT: v_mul_f32_e32 v44, 1.0, v9
-; SI-NEXT: v_mul_f32_e32 v43, 1.0, v11
-; SI-NEXT: v_mul_f32_e32 v42, 1.0, v13
-; SI-NEXT: v_mul_f32_e32 v41, 1.0, v15
-; SI-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_mul_f32_e64 v63, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v62, 1.0, s23
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mul_f32_e64 v62, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v60, 1.0, s19
+; SI-NEXT: v_mul_f32_e32 v57, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v56, 1.0, v3
+; SI-NEXT: v_mul_f32_e32 v47, 1.0, v5
+; SI-NEXT: v_mul_f32_e32 v46, 1.0, v7
+; SI-NEXT: v_mul_f32_e32 v45, 1.0, v9
+; SI-NEXT: v_mul_f32_e32 v44, 1.0, v11
+; SI-NEXT: v_mul_f32_e32 v43, 1.0, v13
+; SI-NEXT: v_mul_f32_e32 v42, 1.0, v15
+; SI-NEXT: v_mul_f32_e32 v18, 1.0, v17
+; SI-NEXT: v_mul_f32_e64 v41, 1.0, s21
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mul_f32_e64 v63, 1.0, s23
; SI-NEXT: v_mul_f32_e64 v61, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v58, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v57, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v32, 1.0, v0
-; SI-NEXT: v_mul_f32_e32 v30, 1.0, v2
-; SI-NEXT: v_mul_f32_e32 v28, 1.0, v4
-; SI-NEXT: v_mul_f32_e32 v26, 1.0, v6
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v8
-; SI-NEXT: v_mul_f32_e32 v22, 1.0, v10
-; SI-NEXT: v_mul_f32_e32 v20, 1.0, v12
-; SI-NEXT: v_mul_f32_e32 v18, 1.0, v14
-; SI-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; SI-NEXT: v_mul_f32_e64 v54, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v52, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v50, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v48, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v38, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v36, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v34, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v59, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v58, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v33, 1.0, v0
+; SI-NEXT: v_mul_f32_e32 v31, 1.0, v2
+; SI-NEXT: v_mul_f32_e32 v29, 1.0, v4
+; SI-NEXT: v_mul_f32_e32 v27, 1.0, v6
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v8
+; SI-NEXT: v_mul_f32_e32 v23, 1.0, v10
+; SI-NEXT: v_mul_f32_e32 v21, 1.0, v12
+; SI-NEXT: v_mul_f32_e32 v19, 1.0, v14
+; SI-NEXT: v_mul_f32_e32 v17, 1.0, v16
+; SI-NEXT: v_mul_f32_e64 v39, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v54, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v52, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v50, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v48, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v37, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v35, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB67_4
; SI-NEXT: ; %bb.1: ; %cmp.false
+; SI-NEXT: v_lshrrev_b32_e32 v40, 16, v62
; SI-NEXT: v_lshrrev_b32_e32 v55, 16, v60
-; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v59
-; SI-NEXT: v_lshr_b64 v[0:1], v[54:55], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[52:53], 16
+; SI-NEXT: v_lshr_b64 v[0:1], v[39:40], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[54:55], 16
+; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v41
+; SI-NEXT: v_lshr_b64 v[2:3], v[52:53], 16
; SI-NEXT: v_lshrrev_b32_e32 v51, 16, v63
-; SI-NEXT: v_lshr_b64 v[2:3], v[50:51], 16
-; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v62
-; SI-NEXT: v_lshr_b64 v[3:4], v[48:49], 16
-; SI-NEXT: v_lshrrev_b32_e32 v39, 16, v61
-; SI-NEXT: v_lshr_b64 v[4:5], v[38:39], 16
-; SI-NEXT: v_lshrrev_b32_e32 v37, 16, v58
-; SI-NEXT: v_lshr_b64 v[5:6], v[36:37], 16
-; SI-NEXT: v_lshrrev_b32_e32 v35, 16, v57
-; SI-NEXT: v_lshr_b64 v[6:7], v[34:35], 16
-; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v56
-; SI-NEXT: v_lshr_b64 v[7:8], v[32:33], 16
-; SI-NEXT: v_lshrrev_b32_e32 v31, 16, v47
-; SI-NEXT: v_lshr_b64 v[8:9], v[30:31], 16
-; SI-NEXT: v_lshrrev_b32_e32 v29, 16, v46
-; SI-NEXT: v_lshr_b64 v[9:10], v[28:29], 16
-; SI-NEXT: v_lshrrev_b32_e32 v27, 16, v45
-; SI-NEXT: v_lshr_b64 v[10:11], v[26:27], 16
-; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v44
-; SI-NEXT: v_lshr_b64 v[11:12], v[24:25], 16
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v43
-; SI-NEXT: v_lshr_b64 v[12:13], v[22:23], 16
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v42
-; SI-NEXT: v_lshr_b64 v[13:14], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v41
-; SI-NEXT: v_lshr_b64 v[14:15], v[18:19], 16
-; SI-NEXT: v_mov_b32_e32 v15, v17
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v15
-; SI-NEXT: v_lshr_b64 v[39:40], v[16:17], 16
-; SI-NEXT: v_mov_b32_e32 v17, v15
-; SI-NEXT: v_mov_b32_e32 v15, v39
+; SI-NEXT: v_lshr_b64 v[3:4], v[50:51], 16
+; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v61
+; SI-NEXT: v_lshr_b64 v[4:5], v[48:49], 16
+; SI-NEXT: v_lshrrev_b32_e32 v38, 16, v59
+; SI-NEXT: v_lshr_b64 v[5:6], v[37:38], 16
+; SI-NEXT: v_lshrrev_b32_e32 v36, 16, v58
+; SI-NEXT: v_lshr_b64 v[6:7], v[35:36], 16
+; SI-NEXT: v_lshrrev_b32_e32 v34, 16, v57
+; SI-NEXT: v_lshr_b64 v[7:8], v[33:34], 16
+; SI-NEXT: v_lshrrev_b32_e32 v32, 16, v56
+; SI-NEXT: v_lshr_b64 v[8:9], v[31:32], 16
+; SI-NEXT: v_lshrrev_b32_e32 v30, 16, v47
+; SI-NEXT: v_lshr_b64 v[9:10], v[29:30], 16
+; SI-NEXT: v_lshrrev_b32_e32 v28, 16, v46
+; SI-NEXT: v_lshr_b64 v[10:11], v[27:28], 16
+; SI-NEXT: v_lshrrev_b32_e32 v26, 16, v45
+; SI-NEXT: v_lshr_b64 v[11:12], v[25:26], 16
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v44
+; SI-NEXT: v_lshr_b64 v[12:13], v[23:24], 16
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v43
+; SI-NEXT: v_lshr_b64 v[13:14], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v42
+; SI-NEXT: v_lshr_b64 v[14:15], v[19:20], 16
+; SI-NEXT: v_mov_b32_e32 v20, v18
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v20
+; SI-NEXT: v_lshr_b64 v[15:16], v[17:18], 16
+; SI-NEXT: v_mov_b32_e32 v18, v20
; SI-NEXT: s_cbranch_execnz .LBB67_3
; SI-NEXT: .LBB67_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v60
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v54
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v62
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v39
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v60
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v54
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v63
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v50
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v41
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v62
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v48
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v63
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v50
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v61
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v38
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v48
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v58
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v36
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v37
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v57
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v34
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v58
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v35
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v56
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v57
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v33
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v47
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v56
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v46
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v47
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v29
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v10
; SI-NEXT: v_lshr_b64 v[9:10], v[9:10], 16
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v45
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v46
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v27
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v11
; SI-NEXT: v_lshr_b64 v[10:11], v[10:11], 16
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v44
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v24
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v45
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v25
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v12
; SI-NEXT: v_lshr_b64 v[11:12], v[11:12], 16
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v43
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v44
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; SI-NEXT: v_lshr_b64 v[12:13], v[12:13], 16
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v42
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v43
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; SI-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
; SI-NEXT: v_lshr_b64 v[13:14], v[13:14], 16
-; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v41
-; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; SI-NEXT: v_add_f32_e32 v17, 0x40c00000, v17
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v42
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
-; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v17
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; SI-NEXT: v_lshr_b64 v[16:17], v[16:17], 16
; SI-NEXT: v_lshr_b64 v[14:15], v[14:15], 16
-; SI-NEXT: v_mov_b32_e32 v15, v16
+; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v17
+; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
+; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; SI-NEXT: v_lshr_b64 v[15:16], v[15:16], 16
; SI-NEXT: .LBB67_3: ; %end
; SI-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
@@ -49794,176 +49788,174 @@ define inreg <8 x double> @bitcast_v32bf16_to_v8f64_scalar(<32 x bfloat> inreg %
; SI-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; SI-NEXT: buffer_store_dword v63, off, s[0:3], s32 ; 4-byte Folded Spill
; SI-NEXT: s_and_b64 s[4:5], vcc, exec
-; SI-NEXT: s_waitcnt expcnt(3)
-; SI-NEXT: v_mul_f32_e64 v60, 1.0, s17
-; SI-NEXT: v_mul_f32_e64 v59, 1.0, s19
-; SI-NEXT: v_mul_f32_e32 v56, 1.0, v1
-; SI-NEXT: v_mul_f32_e32 v47, 1.0, v3
-; SI-NEXT: v_mul_f32_e32 v46, 1.0, v5
-; SI-NEXT: v_mul_f32_e32 v45, 1.0, v7
-; SI-NEXT: v_mul_f32_e32 v44, 1.0, v9
-; SI-NEXT: v_mul_f32_e32 v43, 1.0, v11
-; SI-NEXT: v_mul_f32_e32 v42, 1.0, v13
-; SI-NEXT: v_mul_f32_e32 v41, 1.0, v15
-; SI-NEXT: v_mul_f32_e32 v17, 1.0, v17
-; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_mul_f32_e64 v63, 1.0, s21
-; SI-NEXT: v_mul_f32_e64 v62, 1.0, s23
+; SI-NEXT: s_waitcnt expcnt(1)
+; SI-NEXT: v_mul_f32_e64 v62, 1.0, s17
+; SI-NEXT: v_mul_f32_e64 v60, 1.0, s19
+; SI-NEXT: v_mul_f32_e32 v57, 1.0, v1
+; SI-NEXT: v_mul_f32_e32 v56, 1.0, v3
+; SI-NEXT: v_mul_f32_e32 v47, 1.0, v5
+; SI-NEXT: v_mul_f32_e32 v46, 1.0, v7
+; SI-NEXT: v_mul_f32_e32 v45, 1.0, v9
+; SI-NEXT: v_mul_f32_e32 v44, 1.0, v11
+; SI-NEXT: v_mul_f32_e32 v43, 1.0, v13
+; SI-NEXT: v_mul_f32_e32 v42, 1.0, v15
+; SI-NEXT: v_mul_f32_e32 v18, 1.0, v17
+; SI-NEXT: v_mul_f32_e64 v41, 1.0, s21
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_mul_f32_e64 v63, 1.0, s23
; SI-NEXT: v_mul_f32_e64 v61, 1.0, s25
-; SI-NEXT: v_mul_f32_e64 v58, 1.0, s27
-; SI-NEXT: v_mul_f32_e64 v57, 1.0, s29
-; SI-NEXT: v_mul_f32_e32 v32, 1.0, v0
-; SI-NEXT: v_mul_f32_e32 v30, 1.0, v2
-; SI-NEXT: v_mul_f32_e32 v28, 1.0, v4
-; SI-NEXT: v_mul_f32_e32 v26, 1.0, v6
-; SI-NEXT: v_mul_f32_e32 v24, 1.0, v8
-; SI-NEXT: v_mul_f32_e32 v22, 1.0, v10
-; SI-NEXT: v_mul_f32_e32 v20, 1.0, v12
-; SI-NEXT: v_mul_f32_e32 v18, 1.0, v14
-; SI-NEXT: v_mul_f32_e32 v16, 1.0, v16
-; SI-NEXT: v_mul_f32_e64 v54, 1.0, s16
-; SI-NEXT: v_mul_f32_e64 v52, 1.0, s18
-; SI-NEXT: v_mul_f32_e64 v50, 1.0, s20
-; SI-NEXT: v_mul_f32_e64 v48, 1.0, s22
-; SI-NEXT: v_mul_f32_e64 v38, 1.0, s24
-; SI-NEXT: v_mul_f32_e64 v36, 1.0, s26
-; SI-NEXT: v_mul_f32_e64 v34, 1.0, s28
+; SI-NEXT: v_mul_f32_e64 v59, 1.0, s27
+; SI-NEXT: v_mul_f32_e64 v58, 1.0, s29
+; SI-NEXT: v_mul_f32_e32 v33, 1.0, v0
+; SI-NEXT: v_mul_f32_e32 v31, 1.0, v2
+; SI-NEXT: v_mul_f32_e32 v29, 1.0, v4
+; SI-NEXT: v_mul_f32_e32 v27, 1.0, v6
+; SI-NEXT: v_mul_f32_e32 v25, 1.0, v8
+; SI-NEXT: v_mul_f32_e32 v23, 1.0, v10
+; SI-NEXT: v_mul_f32_e32 v21, 1.0, v12
+; SI-NEXT: v_mul_f32_e32 v19, 1.0, v14
+; SI-NEXT: v_mul_f32_e32 v17, 1.0, v16
+; SI-NEXT: v_mul_f32_e64 v39, 1.0, s16
+; SI-NEXT: v_mul_f32_e64 v54, 1.0, s18
+; SI-NEXT: v_mul_f32_e64 v52, 1.0, s20
+; SI-NEXT: v_mul_f32_e64 v50, 1.0, s22
+; SI-NEXT: v_mul_f32_e64 v48, 1.0, s24
+; SI-NEXT: v_mul_f32_e64 v37, 1.0, s26
+; SI-NEXT: v_mul_f32_e64 v35, 1.0, s28
; SI-NEXT: s_cbranch_scc0 .LBB83_4
; SI-NEXT: ; %bb.1: ; %cmp.false
+; SI-NEXT: v_lshrrev_b32_e32 v40, 16, v62
; SI-NEXT: v_lshrrev_b32_e32 v55, 16, v60
-; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v59
-; SI-NEXT: v_lshr_b64 v[0:1], v[54:55], 16
-; SI-NEXT: v_lshr_b64 v[1:2], v[52:53], 16
+; SI-NEXT: v_lshr_b64 v[0:1], v[39:40], 16
+; SI-NEXT: v_lshr_b64 v[1:2], v[54:55], 16
+; SI-NEXT: v_lshrrev_b32_e32 v53, 16, v41
+; SI-NEXT: v_lshr_b64 v[2:3], v[52:53], 16
; SI-NEXT: v_lshrrev_b32_e32 v51, 16, v63
-; SI-NEXT: v_lshr_b64 v[2:3], v[50:51], 16
-; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v62
-; SI-NEXT: v_lshr_b64 v[3:4], v[48:49], 16
-; SI-NEXT: v_lshrrev_b32_e32 v39, 16, v61
-; SI-NEXT: v_lshr_b64 v[4:5], v[38:39], 16
-; SI-NEXT: v_lshrrev_b32_e32 v37, 16, v58
-; SI-NEXT: v_lshr_b64 v[5:6], v[36:37], 16
-; SI-NEXT: v_lshrrev_b32_e32 v35, 16, v57
-; SI-NEXT: v_lshr_b64 v[6:7], v[34:35], 16
-; SI-NEXT: v_lshrrev_b32_e32 v33, 16, v56
-; SI-NEXT: v_lshr_b64 v[7:8], v[32:33], 16
-; SI-NEXT: v_lshrrev_b32_e32 v31, 16, v47
-; SI-NEXT: v_lshr_b64 v[8:9], v[30:31], 16
-; SI-NEXT: v_lshrrev_b32_e32 v29, 16, v46
-; SI-NEXT: v_lshr_b64 v[9:10], v[28:29], 16
-; SI-NEXT: v_lshrrev_b32_e32 v27, 16, v45
-; SI-NEXT: v_lshr_b64 v[10:11], v[26:27], 16
-; SI-NEXT: v_lshrrev_b32_e32 v25, 16, v44
-; SI-NEXT: v_lshr_b64 v[11:12], v[24:25], 16
-; SI-NEXT: v_lshrrev_b32_e32 v23, 16, v43
-; SI-NEXT: v_lshr_b64 v[12:13], v[22:23], 16
-; SI-NEXT: v_lshrrev_b32_e32 v21, 16, v42
-; SI-NEXT: v_lshr_b64 v[13:14], v[20:21], 16
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v41
-; SI-NEXT: v_lshr_b64 v[14:15], v[18:19], 16
-; SI-NEXT: v_mov_b32_e32 v15, v17
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v15
-; SI-NEXT: v_lshr_b64 v[39:40], v[16:17], 16
-; SI-NEXT: v_mov_b32_e32 v17, v15
-; SI-NEXT: v_mov_b32_e32 v15, v39
+; SI-NEXT: v_lshr_b64 v[3:4], v[50:51], 16
+; SI-NEXT: v_lshrrev_b32_e32 v49, 16, v61
+; SI-NEXT: v_lshr_b64 v[4:5], v[48:49], 16
+; SI-NEXT: v_lshrrev_b32_e32 v38, 16, v59
+; SI-NEXT: v_lshr_b64 v[5:6], v[37:38], 16
+; SI-NEXT: v_lshrrev_b32_e32 v36, 16, v58
+; SI-NEXT: v_lshr_b64 v[6:7], v[35:36], 16
+; SI-NEXT: v_lshrrev_b32_e32 v34, 16, v57
+; SI-NEXT: v_lshr_b64 v[7:8], v[33:34], 16
+; SI-NEXT: v_lshrrev_b32_e32 v32, 16, v56
+; SI-NEXT: v_lshr_b64 v[8:9], v[31:32], 16
+; SI-NEXT: v_lshrrev_b32_e32 v30, 16, v47
+; SI-NEXT: v_lshr_b64 v[9:10], v[29:30], 16
+; SI-NEXT: v_lshrrev_b32_e32 v28, 16, v46
+; SI-NEXT: v_lshr_b64 v[10:11], v[27:28], 16
+; SI-NEXT: v_lshrrev_b32_e32 v26, 16, v45
+; SI-NEXT: v_lshr_b64 v[11:12], v[25:26], 16
+; SI-NEXT: v_lshrrev_b32_e32 v24, 16, v44
+; SI-NEXT: v_lshr_b64 v[12:13], v[23:24], 16
+; SI-NEXT: v_lshrrev_b32_e32 v22, 16, v43
+; SI-NEXT: v_lshr_b64 v[13:14], v[21:22], 16
+; SI-NEXT: v_lshrrev_b32_e32 v20, 16, v42
+; SI-NEXT: v_lshr_b64 v[14:15], v[19:20], 16
+; SI-NEXT: v_mov_b32_e32 v20, v18
+; SI-NEXT: v_lshrrev_b32_e32 v18, 16, v20
+; SI-NEXT: v_lshr_b64 v[15:16], v[17:18], 16
+; SI-NEXT: v_mov_b32_e32 v18, v20
; SI-NEXT: s_cbranch_execnz .LBB83_3
; SI-NEXT: .LBB83_2: ; %cmp.true
-; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v60
-; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v54
+; SI-NEXT: v_and_b32_e32 v1, 0xffff0000, v62
+; SI-NEXT: v_and_b32_e32 v0, 0xffff0000, v39
; SI-NEXT: v_add_f32_e32 v1, 0x40c00000, v1
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v60
; SI-NEXT: v_add_f32_e32 v0, 0x40c00000, v0
; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v54
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[0:1], v[0:1], 16
; SI-NEXT: v_lshr_b64 v[1:2], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v63
-; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v50
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v41
+; SI-NEXT: v_and_b32_e32 v2, 0xffff0000, v52
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_add_f32_e32 v2, 0x40c00000, v2
; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], 16
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v62
-; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v48
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v63
+; SI-NEXT: v_and_b32_e32 v3, 0xffff0000, v50
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_add_f32_e32 v3, 0x40c00000, v3
; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; SI-NEXT: v_lshr_b64 v[3:4], v[3:4], 16
; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v61
-; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v38
+; SI-NEXT: v_and_b32_e32 v4, 0xffff0000, v48
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_add_f32_e32 v4, 0x40c00000, v4
; SI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], 16
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v58
-; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v36
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v59
+; SI-NEXT: v_and_b32_e32 v5, 0xffff0000, v37
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_add_f32_e32 v5, 0x40c00000, v5
; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v6
; SI-NEXT: v_lshr_b64 v[5:6], v[5:6], 16
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v57
-; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v34
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v58
+; SI-NEXT: v_and_b32_e32 v6, 0xffff0000, v35
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_add_f32_e32 v6, 0x40c00000, v6
; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; SI-NEXT: v_lshr_b64 v[6:7], v[6:7], 16
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v56
-; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v32
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v57
+; SI-NEXT: v_and_b32_e32 v7, 0xffff0000, v33
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_add_f32_e32 v7, 0x40c00000, v7
; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v8
; SI-NEXT: v_lshr_b64 v[7:8], v[7:8], 16
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v47
-; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v30
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v56
+; SI-NEXT: v_and_b32_e32 v8, 0xffff0000, v31
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_add_f32_e32 v8, 0x40c00000, v8
; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v9
; SI-NEXT: v_lshr_b64 v[8:9], v[8:9], 16
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v46
-; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v28
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v47
+; SI-NEXT: v_and_b32_e32 v9, 0xffff0000, v29
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_add_f32_e32 v9, 0x40c00000, v9
; SI-NEXT: v_lshrrev_b32_e32 v10, 16, v10
; SI-NEXT: v_lshr_b64 v[9:10], v[9:10], 16
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v45
-; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v26
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v46
+; SI-NEXT: v_and_b32_e32 v10, 0xffff0000, v27
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_add_f32_e32 v10, 0x40c00000, v10
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v11
; SI-NEXT: v_lshr_b64 v[10:11], v[10:11], 16
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v44
-; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v24
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v45
+; SI-NEXT: v_and_b32_e32 v11, 0xffff0000, v25
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_add_f32_e32 v11, 0x40c00000, v11
; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v12
; SI-NEXT: v_lshr_b64 v[11:12], v[11:12], 16
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v43
-; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v22
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v44
+; SI-NEXT: v_and_b32_e32 v12, 0xffff0000, v23
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_add_f32_e32 v12, 0x40c00000, v12
; SI-NEXT: v_lshrrev_b32_e32 v13, 16, v13
; SI-NEXT: v_lshr_b64 v[12:13], v[12:13], 16
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v42
-; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v20
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v43
+; SI-NEXT: v_and_b32_e32 v13, 0xffff0000, v21
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_add_f32_e32 v13, 0x40c00000, v13
; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v14
-; SI-NEXT: v_and_b32_e32 v17, 0xffff0000, v17
; SI-NEXT: v_lshr_b64 v[13:14], v[13:14], 16
-; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v41
-; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v16
-; SI-NEXT: v_add_f32_e32 v17, 0x40c00000, v17
-; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v42
+; SI-NEXT: v_and_b32_e32 v14, 0xffff0000, v19
; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
-; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v17
; SI-NEXT: v_add_f32_e32 v14, 0x40c00000, v14
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; SI-NEXT: v_lshr_b64 v[16:17], v[16:17], 16
; SI-NEXT: v_lshr_b64 v[14:15], v[14:15], 16
-; SI-NEXT: v_mov_b32_e32 v15, v16
+; SI-NEXT: v_and_b32_e32 v16, 0xffff0000, v18
+; SI-NEXT: v_and_b32_e32 v15, 0xffff0000, v17
+; SI-NEXT: v_add_f32_e32 v16, 0x40c00000, v16
+; SI-NEXT: v_add_f32_e32 v15, 0x40c00000, v15
+; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v16
+; SI-NEXT: v_lshr_b64 v[15:16], v[15:16], 16
; SI-NEXT: .LBB83_3: ; %end
; SI-NEXT: buffer_load_dword v63, off, s[0:3], s32 ; 4-byte Folded Reload
; SI-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 447da3d26f793..ce541dd2954f4 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -979,7 +979,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0
; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
; GCN-NEXT: s_mov_b64 s[8:9], 0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_branch .LBB5_3
; GCN-NEXT: .LBB5_1: ; %Flow
@@ -1002,36 +1002,45 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
; GCN-NEXT: ; %bb.4: ; %bb2
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
-; GCN-NEXT: v_mov_b32_e32 v1, v0
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, v1
+; GCN-NEXT: v_mov_b32_e32 v3, v1
+; GCN-NEXT: v_mov_b32_e32 v4, v1
+; GCN-NEXT: v_mov_b32_e32 v5, v4
+; GCN-NEXT: v_mov_b32_e32 v4, v3
+; GCN-NEXT: v_mov_b32_e32 v3, v2
+; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: s_and_saveexec_b64 s[10:11], s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB5_2
; GCN-NEXT: ; %bb.5: ; %bb4
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
-; GCN-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen
-; GCN-NEXT: v_mov_b32_e32 v1, v0
-; GCN-NEXT: v_mov_b32_e32 v2, v0
+; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen
+; GCN-NEXT: v_mov_b32_e32 v2, v1
+; GCN-NEXT: v_mov_b32_e32 v3, v1
+; GCN-NEXT: v_mov_b32_e32 v4, v1
+; GCN-NEXT: v_mov_b32_e32 v5, v4
+; GCN-NEXT: v_mov_b32_e32 v4, v3
+; GCN-NEXT: v_mov_b32_e32 v3, v2
+; GCN-NEXT: v_mov_b32_e32 v2, v1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v3
-; GCN-NEXT: v_mov_b32_e32 v3, v0
+; GCN-NEXT: v_cmp_gt_f32_e64 s[6:7], 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[12:13], s[6:7]
; GCN-NEXT: s_cbranch_execz .LBB5_1
; GCN-NEXT: ; %bb.6: ; %bb8
; GCN-NEXT: ; in Loop: Header=BB5_3 Depth=1
-; GCN-NEXT: v_mov_b32_e32 v1, v0
-; GCN-NEXT: ; implicit-def: $vgpr2
-; GCN-NEXT: ; implicit-def: $vgpr3
+; GCN-NEXT: v_mov_b32_e32 v5, v3
+; GCN-NEXT: v_mov_b32_e32 v4, v2
+; GCN-NEXT: v_mov_b32_e32 v3, v1
+; GCN-NEXT: v_mov_b32_e32 v2, v0
; GCN-NEXT: s_branch .LBB5_1
; GCN-NEXT: .LBB5_7: ; %bb12
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
-; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
+; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
+; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
+; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
index 1e469b1951009..4196a9056a521 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
@@ -8,25 +8,21 @@ define amdgpu_gs void @f(i32 inreg %arg, i32 %arg1, i32 %arg2) {
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.1: ; %bb3
-; CHECK-NEXT: v_mov_b32_e32 v5, v0
+; CHECK-NEXT: v_mov_b32_e32 v2, v0
; CHECK-NEXT: s_branch .LBB0_3
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
-; CHECK-NEXT: v_mov_b32_e32 v5, 1
+; CHECK-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 1
; CHECK-NEXT: .LBB0_3: ; %bb4
-; CHECK-NEXT: v_mov_b32_e32 v6, 0
+; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: s_mov_b32 s2, s0
; CHECK-NEXT: s_mov_b32 s3, s0
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; CHECK-NEXT: v_mov_b32_e32 v7, v6
-; CHECK-NEXT: v_mov_b32_e32 v8, v6
-; CHECK-NEXT: v_mov_b32_e32 v2, v6
-; CHECK-NEXT: v_mov_b32_e32 v3, v6
-; CHECK-NEXT: v_mov_b32_e32 v4, v6
-; CHECK-NEXT: s_clause 0x1
-; CHECK-NEXT: buffer_store_b128 v[5:8], v6, s[0:3], 0 idxen
-; CHECK-NEXT: buffer_store_b128 v[1:4], v6, s[0:3], 0 idxen
+; CHECK-NEXT: v_mov_b32_e32 v4, v3
+; CHECK-NEXT: v_mov_b32_e32 v5, v3
+; CHECK-NEXT: buffer_store_b128 v[2:5], v3, s[0:3], 0 idxen
+; CHECK-NEXT: v_mov_b32_e32 v2, v3
+; CHECK-NEXT: buffer_store_b128 v[1:4], v3, s[0:3], 0 idxen
; CHECK-NEXT: s_endpgm
bb:
%i = icmp eq i32 %arg, 0
diff --git a/llvm/test/CodeGen/AMDGPU/freeze.ll b/llvm/test/CodeGen/AMDGPU/freeze.ll
index 308e86bbaf8fd..7df250d1fc1b4 100644
--- a/llvm/test/CodeGen/AMDGPU/freeze.ll
+++ b/llvm/test/CodeGen/AMDGPU/freeze.ll
@@ -8203,14 +8203,12 @@ define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
-; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5
-; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
-; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -8236,47 +8234,40 @@ define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
-; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
-; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
-; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: freeze_v3p0:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
-; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
-; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[8:9]
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: freeze_v3p0:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
-; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
-; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
-; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -8296,15 +8287,12 @@ define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_clause 0x1
-; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
-; GFX10-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
-; GFX10-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
-; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: freeze_v3p0:
@@ -8323,14 +8311,12 @@ define void @freeze_v3p0(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_clause 0x1
-; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
-; GFX11-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: s_clause 0x1
-; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[6:9], off
-; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%a = load <3 x ptr>, ptr addrspace(1) %ptra
%freeze = freeze <3 x ptr> %a
@@ -9251,14 +9237,12 @@ define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
-; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, v5
-; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
-; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX6-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX6-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -9284,47 +9268,40 @@ define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16
-; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:16
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
-; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[6:9], v[2:3], s[4:7], 0 addr64
-; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[0:1], v[2:3], s[4:7], 0 addr64 offset:16
+; GFX7-GISEL-NEXT: buffer_store_dwordx4 v[4:7], v[2:3], s[4:7], 0 addr64
+; GFX7-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX7-GISEL-NEXT: buffer_store_dwordx2 v[8:9], v[2:3], s[4:7], 0 addr64 offset:16
; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-GISEL-LABEL: freeze_v3p1:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 16, v0
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
; GFX8-GISEL-NEXT: flat_load_dwordx4 v[4:7], v[0:1]
-; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX8-GISEL-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 16, v2
+; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX8-GISEL-NEXT: flat_store_dwordx4 v[2:3], v[4:7]
-; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v2
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v8
-; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v9
-; GFX8-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; GFX8-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[8:9]
; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: freeze_v3p1:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
-; GFX9-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
-; GFX9-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX9-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v5
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
-; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
+; GFX9-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(1)
+; GFX9-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -9344,15 +9321,12 @@ define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: s_clause 0x1
-; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
-; GFX10-GISEL-NEXT: global_load_dwordx4 v[6:9], v[0:1], off
-; GFX10-GISEL-NEXT: ; kill: killed $vgpr0 killed $vgpr1
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[4:7], v[0:1], off
+; GFX10-GISEL-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:16
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, v4
-; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, v5
+; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[4:7], off
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: global_store_dwordx4 v[2:3], v[6:9], off
-; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[0:1], off offset:16
+; GFX10-GISEL-NEXT: global_store_dwordx2 v[2:3], v[8:9], off offset:16
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-SDAG-LABEL: freeze_v3p1:
@@ -9371,14 +9345,12 @@ define void @freeze_v3p1(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb) {
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: s_clause 0x1
-; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16
-; GFX11-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[4:7], v[0:1], off
+; GFX11-GISEL-NEXT: global_load_b128 v[8:11], v[0:1], off offset:16
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(1)
-; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
+; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[4:7], off
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: s_clause 0x1
-; GFX11-GISEL-NEXT: global_store_b128 v[2:3], v[6:9], off
-; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[0:1], off offset:16
+; GFX11-GISEL-NEXT: global_store_b64 v[2:3], v[8:9], off offset:16
; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%a = load <3 x ptr addrspace(1)>, ptr addrspace(1) %ptra
%freeze = freeze <3 x ptr addrspace(1)> %a
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index ba81446a4bc09..20666560a7ec7 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3182,12 +3182,12 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-LABEL: call_72xi32:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_mov_b32 s35, s33
+; GFX11-NEXT: s_mov_b32 s38, s33
; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill
+; GFX11-NEXT: scratch_store_b32 off, v62, s33 offset:1600 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_mov_b32 s0, 0
; GFX11-NEXT: v_mov_b32_e32 v4, 0
@@ -3196,22 +3196,24 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: s_mov_b32 s3, s0
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GFX11-NEXT: s_mov_b32 s36, s34
+; GFX11-NEXT: s_mov_b32 s39, s34
; GFX11-NEXT: s_mov_b32 s34, s32
; GFX11-NEXT: s_addk_i32 s32, 0xa00
-; GFX11-NEXT: s_clause 0xb ; 48-byte Folded Spill
-; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:44
-; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:40
-; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:36
-; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:32
-; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:28
-; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:24
-; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:20
-; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:16
-; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:12
-; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:8
-; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:4
-; GFX11-NEXT: scratch_store_b32 off, v59, s33
+; GFX11-NEXT: s_clause 0xd ; 56-byte Folded Spill
+; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:52
+; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:48
+; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:44
+; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:40
+; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:36
+; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:32
+; GFX11-NEXT: scratch_store_b32 off, v46, s33 offset:28
+; GFX11-NEXT: scratch_store_b32 off, v47, s33 offset:24
+; GFX11-NEXT: scratch_store_b32 off, v56, s33 offset:20
+; GFX11-NEXT: scratch_store_b32 off, v57, s33 offset:16
+; GFX11-NEXT: scratch_store_b32 off, v58, s33 offset:12
+; GFX11-NEXT: scratch_store_b32 off, v59, s33 offset:8
+; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:4
+; GFX11-NEXT: scratch_store_b32 off, v61, s33
; GFX11-NEXT: s_add_i32 s0, s32, 0xa0
; GFX11-NEXT: s_add_i32 s1, s32, 0x90
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32
@@ -3232,7 +3234,7 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: s_add_i32 s0, s32, 32
; GFX11-NEXT: s_add_i32 s1, s32, 16
; GFX11-NEXT: s_add_i32 s2, s33, 0x200
-; GFX11-NEXT: v_writelane_b32 v60, s30, 0
+; GFX11-NEXT: v_writelane_b32 v62, s30, 0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
@@ -3253,126 +3255,106 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
; GFX11-NEXT: s_mov_b32 s1, return_72xi32 at abs32@hi
; GFX11-NEXT: s_mov_b32 s0, return_72xi32 at abs32@lo
-; GFX11-NEXT: v_writelane_b32 v60, s31, 1
+; GFX11-NEXT: v_writelane_b32 v62, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624
-; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640
+; GFX11-NEXT: s_clause 0xb
+; GFX11-NEXT: scratch_load_b128 v[43:46], off, s33 offset:624
+; GFX11-NEXT: scratch_load_b128 v[47:50], off, s33 offset:640
+; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:784
+; GFX11-NEXT: scratch_load_b128 v[12:15], off, s33 offset:768
+; GFX11-NEXT: scratch_load_b128 v[8:11], off, s33 offset:752
+; GFX11-NEXT: scratch_load_b128 v[4:7], off, s33 offset:736
+; GFX11-NEXT: scratch_load_b128 v[0:3], off, s33 offset:720
+; GFX11-NEXT: scratch_load_b128 v[24:27], off, s33 offset:656
+; GFX11-NEXT: scratch_load_b128 v[36:39], off, s33 offset:704
+; GFX11-NEXT: scratch_load_b128 v[32:35], off, s33 offset:688
+; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:672
+; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:512
+; GFX11-NEXT: s_waitcnt vmcnt(6)
+; GFX11-NEXT: v_mov_b32_e32 v6, 24
; GFX11-NEXT: s_add_i32 s2, s32, 0xa0
-; GFX11-NEXT: s_waitcnt vmcnt(1)
-; GFX11-NEXT: v_mov_b32_e32 v32, v48
-; GFX11-NEXT: s_clause 0x9
-; GFX11-NEXT: scratch_load_b128 v[48:51], off, s33 offset:656
-; GFX11-NEXT: scratch_load_b128 v[52:55], off, s33 offset:672
-; GFX11-NEXT: scratch_load_b128 v[37:40], off, s33 offset:688
-; GFX11-NEXT: scratch_load_b128 v[41:44], off, s33 offset:704
-; GFX11-NEXT: scratch_load_b128 v[56:59], off, s33 offset:720
-; GFX11-NEXT: scratch_load_b128 v[12:15], off, s33 offset:736
-; GFX11-NEXT: scratch_load_b128 v[0:3], off, s33 offset:752
-; GFX11-NEXT: scratch_load_b128 v[4:7], off, s33 offset:768
-; GFX11-NEXT: scratch_load_b128 v[8:11], off, s33 offset:784
-; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:512
-; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4
+; GFX11-NEXT: s_add_i32 s3, s32, 0x90
+; GFX11-NEXT: s_add_i32 s35, s32, 0x80
+; GFX11-NEXT: s_add_i32 s36, s32, 0x70
+; GFX11-NEXT: s_add_i32 s37, s32, 0x6c
+; GFX11-NEXT: s_waitcnt vmcnt(4)
+; GFX11-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v53, v26
+; GFX11-NEXT: v_dual_mov_b32 v41, v1 :: v_dual_mov_b32 v42, v2
+; GFX11-NEXT: v_mov_b32_e32 v51, v24
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill
-; GFX11-NEXT: s_clause 0x3
-; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:528
-; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:544
-; GFX11-NEXT: scratch_load_b128 v[24:27], off, s33 offset:560
-; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:576
-; GFX11-NEXT: v_dual_mov_b32 v4, v7 :: v_dual_mov_b32 v7, v10
-; GFX11-NEXT: s_waitcnt vmcnt(2)
-; GFX11-NEXT: v_mov_b32_e32 v10, v21
+; GFX11-NEXT: scratch_store_b128 off, v[20:23], s33 offset:1584 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:528
+; GFX11-NEXT: v_mov_b32_e32 v52, v25
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1568 ; 16-byte Folded Spill
-; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:592
+; GFX11-NEXT: scratch_store_b128 off, v[20:23], s33 offset:1536 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:544
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1552 ; 16-byte Folded Spill
-; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:608
+; GFX11-NEXT: scratch_store_b128 off, v[20:23], s33 offset:1568 ; 16-byte Folded Spill
+; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:560
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill
-; GFX11-NEXT: scratch_store_b128 off, v[32:35], s32
-; GFX11-NEXT: v_mov_b32_e32 v32, v36
-; GFX11-NEXT: v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49
-; GFX11-NEXT: v_dual_mov_b32 v35, v50 :: v_dual_mov_b32 v48, v51
-; GFX11-NEXT: v_dual_mov_b32 v49, v52 :: v_dual_mov_b32 v50, v53
-; GFX11-NEXT: v_dual_mov_b32 v51, v54 :: v_dual_mov_b32 v36, v55
-; GFX11-NEXT: v_dual_mov_b32 v53, v41 :: v_dual_mov_b32 v52, v40
-; GFX11-NEXT: v_dual_mov_b32 v54, v42 :: v_dual_mov_b32 v41, v56
-; GFX11-NEXT: v_dual_mov_b32 v55, v43 :: v_dual_mov_b32 v40, v44
-; GFX11-NEXT: v_dual_mov_b32 v42, v57 :: v_dual_mov_b32 v57, v12
-; GFX11-NEXT: v_dual_mov_b32 v43, v58 :: v_dual_mov_b32 v56, v59
-; GFX11-NEXT: v_mov_b32_e32 v58, v13
-; GFX11-NEXT: v_dual_mov_b32 v12, v15 :: v_dual_mov_b32 v13, v0
-; GFX11-NEXT: v_dual_mov_b32 v15, v2 :: v_dual_mov_b32 v0, v3
-; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6
-; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9
-; GFX11-NEXT: v_mov_b32_e32 v9, v20
-; GFX11-NEXT: scratch_store_b32 off, v11, s2
-; GFX11-NEXT: s_add_i32 s2, s32, 0x90
-; GFX11-NEXT: v_mov_b32_e32 v11, v22
-; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2
-; GFX11-NEXT: s_add_i32 s2, s32, 0x80
-; GFX11-NEXT: v_mov_b32_e32 v5, v16
-; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2
-; GFX11-NEXT: v_mov_b32_e32 v0, 24
-; GFX11-NEXT: s_add_i32 s2, s32, 0x70
-; GFX11-NEXT: v_mov_b32_e32 v6, v17
-; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2
-; GFX11-NEXT: s_add_i32 s2, s32, 0x6c
-; GFX11-NEXT: v_mov_b32_e32 v7, v18
-; GFX11-NEXT: scratch_store_b32 off, v0, s2
+; GFX11-NEXT: scratch_store_b128 off, v[20:23], s33 offset:1552 ; 16-byte Folded Spill
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:576
+; GFX11-NEXT: scratch_load_b128 v[58:61], off, s33 offset:592
+; GFX11-NEXT: scratch_load_b128 v[54:57], off, s33 offset:608
+; GFX11-NEXT: scratch_store_b128 off, v[46:49], s32
+; GFX11-NEXT: scratch_store_b32 off, v19, s2
+; GFX11-NEXT: scratch_store_b128 off, v[15:18], s3
+; GFX11-NEXT: scratch_store_b128 off, v[11:14], s35
+; GFX11-NEXT: scratch_store_b128 off, v[7:10], s36
; GFX11-NEXT: s_add_i32 s2, s32, 0x60
-; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26
-; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2
+; GFX11-NEXT: scratch_store_b32 off, v6, s37
+; GFX11-NEXT: scratch_store_b96 off, v[3:5], s2
; GFX11-NEXT: s_add_i32 s2, s32, 0x50
-; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45
-; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2
-; GFX11-NEXT: s_add_i32 s2, s32, 64
-; GFX11-NEXT: v_mov_b32_e32 v13, v24
-; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2
-; GFX11-NEXT: s_add_i32 s2, s32, 48
-; GFX11-NEXT: v_mov_b32_e32 v14, v25
-; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2
-; GFX11-NEXT: s_add_i32 s2, s32, 32
-; GFX11-NEXT: v_mov_b32_e32 v16, v27
-; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2
+; GFX11-NEXT: s_add_i32 s3, s32, 64
+; GFX11-NEXT: s_add_i32 s35, s32, 48
+; GFX11-NEXT: s_add_i32 s36, s32, 32
+; GFX11-NEXT: scratch_store_b128 off, v[39:42], s2
+; GFX11-NEXT: scratch_store_b128 off, v[35:38], s3
+; GFX11-NEXT: scratch_store_b128 off, v[31:34], s35
+; GFX11-NEXT: scratch_store_b128 off, v[27:30], s36
; GFX11-NEXT: s_add_i32 s2, s32, 16
-; GFX11-NEXT: v_mov_b32_e32 v30, v46
-; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2
+; GFX11-NEXT: v_mov_b32_e32 v29, v43
+; GFX11-NEXT: scratch_store_b128 off, v[50:53], s2
; GFX11-NEXT: s_clause 0x3 ; 64-byte Folded Reload
-; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1568
-; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1552
-; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1536
; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1584
+; GFX11-NEXT: scratch_load_b128 v[5:8], off, s33 offset:1536
+; GFX11-NEXT: scratch_load_b128 v[9:12], off, s33 offset:1568
+; GFX11-NEXT: scratch_load_b128 v[13:16], off, s33 offset:1552
; GFX11-NEXT: s_add_i32 s2, s33, 0x400
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v0, s2
-; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_mov_b32_e32 v1, 42
+; GFX11-NEXT: v_dual_mov_b32 v30, v44 :: v_dual_mov_b32 v31, v45
+; GFX11-NEXT: s_waitcnt vmcnt(3)
+; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 42
+; GFX11-NEXT: v_dual_mov_b32 v17, v20 :: v_dual_mov_b32 v18, v21
+; GFX11-NEXT: v_dual_mov_b32 v19, v22 :: v_dual_mov_b32 v20, v23
+; GFX11-NEXT: v_dual_mov_b32 v21, v58 :: v_dual_mov_b32 v22, v59
+; GFX11-NEXT: v_dual_mov_b32 v23, v60 :: v_dual_mov_b32 v24, v61
+; GFX11-NEXT: v_dual_mov_b32 v25, v54 :: v_dual_mov_b32 v26, v55
+; GFX11-NEXT: v_dual_mov_b32 v27, v56 :: v_dual_mov_b32 v28, v57
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX11-NEXT: s_clause 0xb ; 48-byte Folded Reload
-; GFX11-NEXT: scratch_load_b32 v59, off, s33
-; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4
-; GFX11-NEXT: scratch_load_b32 v57, off, s33 offset:8
-; GFX11-NEXT: scratch_load_b32 v56, off, s33 offset:12
-; GFX11-NEXT: scratch_load_b32 v47, off, s33 offset:16
-; GFX11-NEXT: scratch_load_b32 v46, off, s33 offset:20
-; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:24
-; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:28
-; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:32
-; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:36
-; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:40
-; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:44
-; GFX11-NEXT: v_readlane_b32 s31, v60, 1
-; GFX11-NEXT: v_readlane_b32 s30, v60, 0
+; GFX11-NEXT: s_clause 0xd ; 56-byte Folded Reload
+; GFX11-NEXT: scratch_load_b32 v61, off, s33
+; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:4
+; GFX11-NEXT: scratch_load_b32 v59, off, s33 offset:8
+; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:12
+; GFX11-NEXT: scratch_load_b32 v57, off, s33 offset:16
+; GFX11-NEXT: scratch_load_b32 v56, off, s33 offset:20
+; GFX11-NEXT: scratch_load_b32 v47, off, s33 offset:24
+; GFX11-NEXT: scratch_load_b32 v46, off, s33 offset:28
+; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:32
+; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:36
+; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:40
+; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:44
+; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:48
+; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:52
+; GFX11-NEXT: v_readlane_b32 s31, v62, 1
+; GFX11-NEXT: v_readlane_b32 s30, v62, 0
; GFX11-NEXT: s_mov_b32 s32, s34
-; GFX11-NEXT: s_mov_b32 s34, s36
+; GFX11-NEXT: s_mov_b32 s34, s39
; GFX11-NEXT: s_or_saveexec_b32 s0, -1
-; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1600 ; 4-byte Folded Reload
+; GFX11-NEXT: scratch_load_b32 v62, off, s33 offset:1600 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
-; GFX11-NEXT: s_mov_b32 s33, s35
+; GFX11-NEXT: s_mov_b32 s33, s38
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
index fa0568d307907..3d79bdc25336d 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
@@ -462,12 +462,13 @@ define amdgpu_kernel void @test_flat_misaligned_v3(ptr %arg) {
; ALIGNED-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; ALIGNED-GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; ALIGNED-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; ALIGNED-GFX10-NEXT: v_add_co_u32 v4, s0, s0, v0
-; ALIGNED-GFX10-NEXT: v_add_co_ci_u32_e64 v5, s0, s1, 0, s0
-; ALIGNED-GFX10-NEXT: flat_load_dwordx3 v[1:3], v[4:5]
+; ALIGNED-GFX10-NEXT: v_add_co_u32 v5, s0, s0, v0
+; ALIGNED-GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s1, 0, s0
+; ALIGNED-GFX10-NEXT: flat_load_dwordx3 v[0:2], v[5:6]
; ALIGNED-GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; ALIGNED-GFX10-NEXT: v_mov_b32_e32 v0, v3
-; ALIGNED-GFX10-NEXT: flat_store_dwordx3 v[4:5], v[0:2]
+; ALIGNED-GFX10-NEXT: v_mov_b32_e32 v3, v0
+; ALIGNED-GFX10-NEXT: v_mov_b32_e32 v4, v1
+; ALIGNED-GFX10-NEXT: flat_store_dwordx3 v[5:6], v[2:4]
; ALIGNED-GFX10-NEXT: s_endpgm
;
; UNALIGNED-GFX10-LABEL: test_flat_misaligned_v3:
@@ -475,12 +476,13 @@ define amdgpu_kernel void @test_flat_misaligned_v3(ptr %arg) {
; UNALIGNED-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; UNALIGNED-GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; UNALIGNED-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; UNALIGNED-GFX10-NEXT: v_add_co_u32 v4, s0, s0, v0
-; UNALIGNED-GFX10-NEXT: v_add_co_ci_u32_e64 v5, s0, s1, 0, s0
-; UNALIGNED-GFX10-NEXT: flat_load_dwordx3 v[1:3], v[4:5]
+; UNALIGNED-GFX10-NEXT: v_add_co_u32 v5, s0, s0, v0
+; UNALIGNED-GFX10-NEXT: v_add_co_ci_u32_e64 v6, s0, s1, 0, s0
+; UNALIGNED-GFX10-NEXT: flat_load_dwordx3 v[0:2], v[5:6]
; UNALIGNED-GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; UNALIGNED-GFX10-NEXT: v_mov_b32_e32 v0, v3
-; UNALIGNED-GFX10-NEXT: flat_store_dwordx3 v[4:5], v[0:2]
+; UNALIGNED-GFX10-NEXT: v_mov_b32_e32 v3, v0
+; UNALIGNED-GFX10-NEXT: v_mov_b32_e32 v4, v1
+; UNALIGNED-GFX10-NEXT: flat_store_dwordx3 v[5:6], v[2:4]
; UNALIGNED-GFX10-NEXT: s_endpgm
;
; ALIGNED-GFX11-LABEL: test_flat_misaligned_v3:
@@ -490,13 +492,13 @@ define amdgpu_kernel void @test_flat_misaligned_v3(ptr %arg) {
; ALIGNED-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; ALIGNED-GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; ALIGNED-GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; ALIGNED-GFX11-NEXT: v_add_co_u32 v4, s0, s0, v0
+; ALIGNED-GFX11-NEXT: v_add_co_u32 v5, s0, s0, v0
; ALIGNED-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; ALIGNED-GFX11-NEXT: v_add_co_ci_u32_e64 v5, null, s1, 0, s0
-; ALIGNED-GFX11-NEXT: flat_load_b96 v[1:3], v[4:5]
+; ALIGNED-GFX11-NEXT: v_add_co_ci_u32_e64 v6, null, s1, 0, s0
+; ALIGNED-GFX11-NEXT: flat_load_b96 v[0:2], v[5:6]
; ALIGNED-GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; ALIGNED-GFX11-NEXT: v_mov_b32_e32 v0, v3
-; ALIGNED-GFX11-NEXT: flat_store_b96 v[4:5], v[0:2]
+; ALIGNED-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, v1
+; ALIGNED-GFX11-NEXT: flat_store_b96 v[5:6], v[2:4]
; ALIGNED-GFX11-NEXT: s_endpgm
;
; UNALIGNED-GFX11-LABEL: test_flat_misaligned_v3:
@@ -506,13 +508,13 @@ define amdgpu_kernel void @test_flat_misaligned_v3(ptr %arg) {
; UNALIGNED-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; UNALIGNED-GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; UNALIGNED-GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; UNALIGNED-GFX11-NEXT: v_add_co_u32 v4, s0, s0, v0
+; UNALIGNED-GFX11-NEXT: v_add_co_u32 v5, s0, s0, v0
; UNALIGNED-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; UNALIGNED-GFX11-NEXT: v_add_co_ci_u32_e64 v5, null, s1, 0, s0
-; UNALIGNED-GFX11-NEXT: flat_load_b96 v[1:3], v[4:5]
+; UNALIGNED-GFX11-NEXT: v_add_co_ci_u32_e64 v6, null, s1, 0, s0
+; UNALIGNED-GFX11-NEXT: flat_load_b96 v[0:2], v[5:6]
; UNALIGNED-GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; UNALIGNED-GFX11-NEXT: v_mov_b32_e32 v0, v3
-; UNALIGNED-GFX11-NEXT: flat_store_b96 v[4:5], v[0:2]
+; UNALIGNED-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, v1
+; UNALIGNED-GFX11-NEXT: flat_store_b96 v[5:6], v[2:4]
; UNALIGNED-GFX11-NEXT: s_endpgm
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -604,33 +606,36 @@ define amdgpu_kernel void @test_local_aligned_v3(ptr addrspace(3) %arg) {
; SPLIT: ; %bb.0: ; %bb
; SPLIT-NEXT: s_load_dword s0, s[4:5], 0x24
; SPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; SPLIT-NEXT: v_lshl_add_u32 v4, v0, 2, s0
-; SPLIT-NEXT: ds_read_b96 v[1:3], v4
+; SPLIT-NEXT: v_lshl_add_u32 v5, v0, 2, s0
+; SPLIT-NEXT: ds_read_b96 v[0:2], v5
; SPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; SPLIT-NEXT: v_mov_b32_e32 v0, v3
-; SPLIT-NEXT: ds_write_b96 v4, v[0:2]
+; SPLIT-NEXT: v_mov_b32_e32 v3, v0
+; SPLIT-NEXT: v_mov_b32_e32 v4, v1
+; SPLIT-NEXT: ds_write_b96 v5, v[2:4]
; SPLIT-NEXT: s_endpgm
;
; ALIGNED-GFX10-LABEL: test_local_aligned_v3:
; ALIGNED-GFX10: ; %bb.0: ; %bb
; ALIGNED-GFX10-NEXT: s_load_dword s0, s[4:5], 0x24
; ALIGNED-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; ALIGNED-GFX10-NEXT: v_lshl_add_u32 v4, v0, 2, s0
-; ALIGNED-GFX10-NEXT: ds_read_b96 v[1:3], v4
+; ALIGNED-GFX10-NEXT: v_lshl_add_u32 v5, v0, 2, s0
+; ALIGNED-GFX10-NEXT: ds_read_b96 v[0:2], v5
; ALIGNED-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; ALIGNED-GFX10-NEXT: v_mov_b32_e32 v0, v3
-; ALIGNED-GFX10-NEXT: ds_write_b96 v4, v[0:2]
+; ALIGNED-GFX10-NEXT: v_mov_b32_e32 v3, v0
+; ALIGNED-GFX10-NEXT: v_mov_b32_e32 v4, v1
+; ALIGNED-GFX10-NEXT: ds_write_b96 v5, v[2:4]
; ALIGNED-GFX10-NEXT: s_endpgm
;
; UNALIGNED-GFX10-LABEL: test_local_aligned_v3:
; UNALIGNED-GFX10: ; %bb.0: ; %bb
; UNALIGNED-GFX10-NEXT: s_load_dword s0, s[4:5], 0x24
; UNALIGNED-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; UNALIGNED-GFX10-NEXT: v_lshl_add_u32 v4, v0, 2, s0
-; UNALIGNED-GFX10-NEXT: ds_read_b96 v[1:3], v4
+; UNALIGNED-GFX10-NEXT: v_lshl_add_u32 v5, v0, 2, s0
+; UNALIGNED-GFX10-NEXT: ds_read_b96 v[0:2], v5
; UNALIGNED-GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; UNALIGNED-GFX10-NEXT: v_mov_b32_e32 v0, v3
-; UNALIGNED-GFX10-NEXT: ds_write_b96 v4, v[0:2]
+; UNALIGNED-GFX10-NEXT: v_mov_b32_e32 v3, v0
+; UNALIGNED-GFX10-NEXT: v_mov_b32_e32 v4, v1
+; UNALIGNED-GFX10-NEXT: ds_write_b96 v5, v[2:4]
; UNALIGNED-GFX10-NEXT: s_endpgm
;
; ALIGNED-GFX11-LABEL: test_local_aligned_v3:
@@ -639,11 +644,11 @@ define amdgpu_kernel void @test_local_aligned_v3(ptr addrspace(3) %arg) {
; ALIGNED-GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; ALIGNED-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; ALIGNED-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; ALIGNED-GFX11-NEXT: v_lshl_add_u32 v4, v0, 2, s0
-; ALIGNED-GFX11-NEXT: ds_load_b96 v[1:3], v4
+; ALIGNED-GFX11-NEXT: v_lshl_add_u32 v5, v0, 2, s0
+; ALIGNED-GFX11-NEXT: ds_load_b96 v[0:2], v5
; ALIGNED-GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; ALIGNED-GFX11-NEXT: v_mov_b32_e32 v0, v3
-; ALIGNED-GFX11-NEXT: ds_store_b96 v4, v[0:2]
+; ALIGNED-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, v1
+; ALIGNED-GFX11-NEXT: ds_store_b96 v5, v[2:4]
; ALIGNED-GFX11-NEXT: s_endpgm
;
; UNALIGNED-GFX11-LABEL: test_local_aligned_v3:
@@ -652,11 +657,11 @@ define amdgpu_kernel void @test_local_aligned_v3(ptr addrspace(3) %arg) {
; UNALIGNED-GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; UNALIGNED-GFX11-NEXT: s_waitcnt lgkmcnt(0)
; UNALIGNED-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; UNALIGNED-GFX11-NEXT: v_lshl_add_u32 v4, v0, 2, s0
-; UNALIGNED-GFX11-NEXT: ds_load_b96 v[1:3], v4
+; UNALIGNED-GFX11-NEXT: v_lshl_add_u32 v5, v0, 2, s0
+; UNALIGNED-GFX11-NEXT: ds_load_b96 v[0:2], v5
; UNALIGNED-GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; UNALIGNED-GFX11-NEXT: v_mov_b32_e32 v0, v3
-; UNALIGNED-GFX11-NEXT: ds_store_b96 v4, v[0:2]
+; UNALIGNED-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v4, v1
+; UNALIGNED-GFX11-NEXT: ds_store_b96 v5, v[2:4]
; UNALIGNED-GFX11-NEXT: s_endpgm
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -856,12 +861,11 @@ define amdgpu_kernel void @test_local_v4_aligned8(ptr addrspace(3) %arg) {
; SPLIT-NEXT: s_load_dword s0, s[4:5], 0x24
; SPLIT-NEXT: s_waitcnt lgkmcnt(0)
; SPLIT-NEXT: v_lshl_add_u32 v6, v0, 2, s0
-; SPLIT-NEXT: ds_read2_b64 v[0:3], v6 offset1:1
+; SPLIT-NEXT: ds_read2_b64 v[1:4], v6 offset1:1
; SPLIT-NEXT: s_waitcnt lgkmcnt(0)
-; SPLIT-NEXT: v_mov_b32_e32 v4, v1
-; SPLIT-NEXT: v_mov_b32_e32 v5, v0
-; SPLIT-NEXT: v_mov_b32_e32 v1, v3
-; SPLIT-NEXT: ds_write2_b64 v6, v[1:2], v[4:5] offset1:1
+; SPLIT-NEXT: v_mov_b32_e32 v0, v2
+; SPLIT-NEXT: v_mov_b32_e32 v5, v3
+; SPLIT-NEXT: ds_write2_b64 v6, v[4:5], v[0:1] offset1:1
; SPLIT-NEXT: s_endpgm
;
; ALIGNED-GFX10-LABEL: test_local_v4_aligned8:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
index 347fddbedb0a7..25996ee11c5a1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
@@ -1003,10 +1003,10 @@ define amdgpu_kernel void @test_smfmac_i32_32x32x64_i8__vgpr(ptr addrspace(1) %a
; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19]
-; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17]
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13]
+; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
@@ -2000,10 +2000,10 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace(
; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19]
-; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17]
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13]
+; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
@@ -2349,10 +2349,10 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace(
; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19]
-; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17]
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13]
+; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
@@ -2698,10 +2698,10 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace(
; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19]
-; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17]
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13]
+; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
@@ -3047,10 +3047,10 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace(
; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[18:19]
-; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[16:17]
; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[12:13]
+; GISEL-NEXT: v_mov_b32_e32 v28, s2
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 0
; GISEL-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
index a10c861601c2c..192b4983ed7f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -3011,17 +3011,17 @@ define void @test_writelane_v3i64(ptr addrspace(1) %out, <3 x i64> %src, i32 %sr
; GFX802-GISEL: ; %bb.0:
; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX802-GISEL-NEXT: v_add_u32_e32 v17, vcc, 16, v0
-; GFX802-GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc
; GFX802-GISEL-NEXT: flat_load_dwordx4 v[9:12], v[0:1]
+; GFX802-GISEL-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc
; GFX802-GISEL-NEXT: flat_load_dwordx4 v[13:16], v[17:18]
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v8
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5
+; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v6
; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v7
-; GFX802-GISEL-NEXT: s_mov_b32 m0, s5
; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX802-GISEL-NEXT: v_writelane_b32 v9, s4, m0
; GFX802-GISEL-NEXT: v_writelane_b32 v10, s6, m0
@@ -3030,10 +3030,8 @@ define void @test_writelane_v3i64(ptr addrspace(1) %out, <3 x i64> %src, i32 %sr
; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX802-GISEL-NEXT: v_writelane_b32 v13, s9, m0
; GFX802-GISEL-NEXT: v_writelane_b32 v14, s10, m0
-; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, v13
-; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, v14
; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[9:12]
-; GFX802-GISEL-NEXT: flat_store_dwordx2 v[17:18], v[2:3]
+; GFX802-GISEL-NEXT: flat_store_dwordx2 v[17:18], v[13:14]
; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31]
;
@@ -3043,25 +3041,23 @@ define void @test_writelane_v3i64(ptr addrspace(1) %out, <3 x i64> %src, i32 %sr
; GFX1010-GISEL-NEXT: s_clause 0x1
; GFX1010-GISEL-NEXT: global_load_dwordx4 v[9:12], v[0:1], off
; GFX1010-GISEL-NEXT: global_load_dwordx4 v[13:16], v[0:1], off offset:16
-; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v8
-; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v6
-; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v7
; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2
+; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v8
; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3
-; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v4
-; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v5
+; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4
+; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5
+; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v6
+; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v7
; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX1010-GISEL-NEXT: v_writelane_b32 v9, s4, s5
-; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s7, s5
-; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s8, s5
; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s6, s5
-; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s9, s5
-; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s10, s5
-; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, v13
-; GFX1010-GISEL-NEXT: v_mov_b32_e32 v3, v14
+; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s7, s5
+; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s8, s5
+; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s9, s5
+; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s10, s5
; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[9:12], off
-; GFX1010-GISEL-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16
+; GFX1010-GISEL-NEXT: global_store_dwordx2 v[0:1], v[13:14], off offset:16
; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: test_writelane_v3i64:
@@ -3070,26 +3066,24 @@ define void @test_writelane_v3i64(ptr addrspace(1) %out, <3 x i64> %src, i32 %sr
; GFX1100-GISEL-NEXT: s_clause 0x1
; GFX1100-GISEL-NEXT: global_load_b128 v[9:12], v[0:1], off
; GFX1100-GISEL-NEXT: global_load_b128 v[13:16], v[0:1], off offset:16
-; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v8
-; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v6
-; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v7
; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2
+; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v8
; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3
-; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v4
-; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v5
+; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4
+; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5
+; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v6
+; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v7
; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1)
; GFX1100-GISEL-NEXT: v_writelane_b32 v9, s0, s1
-; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s3, s1
-; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s4, s1
; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s2, s1
-; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s5, s1
-; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s6, s1
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, v13
-; GFX1100-GISEL-NEXT: v_mov_b32_e32 v3, v14
+; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s3, s1
+; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s4, s1
+; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0)
+; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s5, s1
+; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s6, s1
; GFX1100-GISEL-NEXT: s_clause 0x1
; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[9:12], off
-; GFX1100-GISEL-NEXT: global_store_b64 v[0:1], v[2:3], off offset:16
+; GFX1100-GISEL-NEXT: global_store_b64 v[0:1], v[13:14], off offset:16
; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31]
%oldval = load <3 x i64>, ptr addrspace(1) %out
%writelane = call <3 x i64> @llvm.amdgcn.writelane.v2i64(<3 x i64> %src, i32 %src1, <3 x i64> %oldval)
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index 876940da7f575..689b38846c61b 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -6511,85 +6511,84 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: v_mov_b32_e32 v3, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, v3
+; GFX8-NEXT: v_mov_b32_e32 v7, v3
+; GFX8-NEXT: v_mov_b32_e32 v9, v3
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
-; GFX8-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-NEXT: v_mov_b32_e32 v3, v1
-; GFX8-NEXT: v_mov_b32_e32 v5, v1
-; GFX8-NEXT: v_mov_b32_e32 v7, v1
-; GFX8-NEXT: v_mov_b32_e32 v9, v1
-; GFX8-NEXT: v_mov_b32_e32 v11, v1
+; GFX8-NEXT: v_mov_b32_e32 v1, v3
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
-; GFX8-NEXT: s_bfe_u32 s3, s2, 0x10009
-; GFX8-NEXT: s_bfe_u32 s4, s2, 0x1000d
-; GFX8-NEXT: s_bfe_u32 s5, s2, 0x10007
-; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10003
-; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10001
-; GFX8-NEXT: s_and_b32 s8, s2, 1
-; GFX8-NEXT: s_bfe_u32 s9, s2, 0x10002
-; GFX8-NEXT: s_bfe_u32 s10, s2, 0x10004
-; GFX8-NEXT: s_bfe_u32 s11, s2, 0x10006
-; GFX8-NEXT: s_bfe_u32 s12, s2, 0x1000c
+; GFX8-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX8-NEXT: s_bfe_u32 s4, s2, 0x10009
+; GFX8-NEXT: s_bfe_u32 s5, s2, 0x1000d
+; GFX8-NEXT: s_bfe_u32 s6, s2, 0x10007
+; GFX8-NEXT: s_bfe_u32 s7, s2, 0x10003
+; GFX8-NEXT: s_bfe_u32 s8, s2, 0x10001
+; GFX8-NEXT: s_and_b32 s9, s2, 1
+; GFX8-NEXT: s_bfe_u32 s10, s2, 0x10002
+; GFX8-NEXT: s_bfe_u32 s11, s2, 0x10004
+; GFX8-NEXT: s_bfe_u32 s12, s2, 0x10006
+; GFX8-NEXT: s_bfe_u32 s13, s2, 0x1000c
; GFX8-NEXT: s_bfe_u32 s2, s2, 0x1000a
-; GFX8-NEXT: v_and_b32_e32 v4, 0xffff, v0
-; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_bfe_u32 v4, v2, 11, 1
+; GFX8-NEXT: v_lshrrev_b32_e32 v8, 15, v2
+; GFX8-NEXT: v_bfe_u32 v12, v2, 5, 1
+; GFX8-NEXT: v_bfe_u32 v6, v2, 14, 1
+; GFX8-NEXT: v_bfe_u32 v0, v2, 8, 1
+; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x50
-; GFX8-NEXT: v_mov_b32_e32 v6, s3
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: v_mov_b32_e32 v13, s3
-; GFX8-NEXT: v_mov_b32_e32 v12, s2
+; GFX8-NEXT: v_mov_b32_e32 v11, s3
+; GFX8-NEXT: v_mov_b32_e32 v10, s2
; GFX8-NEXT: s_add_u32 s2, s0, 64
-; GFX8-NEXT: v_bfe_u32 v2, v4, 11, 1
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[0:3]
-; GFX8-NEXT: v_mov_b32_e32 v13, s3
-; GFX8-NEXT: v_mov_b32_e32 v12, s2
+; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[2:5]
+; GFX8-NEXT: v_mov_b32_e32 v11, s3
+; GFX8-NEXT: v_mov_b32_e32 v2, s4
+; GFX8-NEXT: v_mov_b32_e32 v10, s2
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
-; GFX8-NEXT: v_lshrrev_b32_e32 v10, 15, v4
-; GFX8-NEXT: v_bfe_u32 v14, v4, 5, 1
-; GFX8-NEXT: v_bfe_u32 v8, v4, 14, 1
-; GFX8-NEXT: v_bfe_u32 v4, v4, 8, 1
+; GFX8-NEXT: flat_store_dwordx4 v[10:11], v[0:3]
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
-; GFX8-NEXT: v_mov_b32_e32 v0, s12
-; GFX8-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NEXT: v_mov_b32_e32 v4, s2
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: s_add_u32 s2, s0, 0x60
+; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[6:9]
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[8:11]
-; GFX8-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NEXT: v_mov_b32_e32 v4, s2
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v4, s5
+; GFX8-NEXT: v_mov_b32_e32 v2, s13
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: s_add_u32 s2, s0, 48
-; GFX8-NEXT: v_mov_b32_e32 v2, s4
+; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NEXT: v_mov_b32_e32 v4, s2
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s12
+; GFX8-NEXT: v_mov_b32_e32 v4, s6
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: s_add_u32 s2, s0, 32
-; GFX8-NEXT: v_mov_b32_e32 v0, s11
-; GFX8-NEXT: v_mov_b32_e32 v2, s5
+; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NEXT: v_mov_b32_e32 v4, s2
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s11
+; GFX8-NEXT: v_mov_b32_e32 v4, v12
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: s_add_u32 s2, s0, 16
-; GFX8-NEXT: v_mov_b32_e32 v0, s10
-; GFX8-NEXT: v_mov_b32_e32 v2, v14
+; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GFX8-NEXT: s_addc_u32 s3, s1, 0
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: v_mov_b32_e32 v5, s3
-; GFX8-NEXT: v_mov_b32_e32 v0, s9
-; GFX8-NEXT: v_mov_b32_e32 v2, s6
-; GFX8-NEXT: v_mov_b32_e32 v4, s2
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT: v_mov_b32_e32 v5, s1
-; GFX8-NEXT: v_mov_b32_e32 v0, s8
-; GFX8-NEXT: v_mov_b32_e32 v2, s7
-; GFX8-NEXT: v_mov_b32_e32 v4, s0
-; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s10
+; GFX8-NEXT: v_mov_b32_e32 v4, s7
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: v_mov_b32_e32 v2, s9
+; GFX8-NEXT: v_mov_b32_e32 v4, s8
+; GFX8-NEXT: v_mov_b32_e32 v1, s1
+; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GFX8-NEXT: s_endpgm
;
; EG-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -6678,57 +6677,58 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
; GFX12-LABEL: constant_zextload_v16i1_to_v16i64:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: v_mov_b32_e32 v3, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_u16 v0, v1, s[2:3]
+; GFX12-NEXT: global_load_u16 v0, v3, s[2:3]
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
-; GFX12-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v4, 0xffff, v0
-; GFX12-NEXT: v_mov_b32_e32 v11, v1
+; GFX12-NEXT: v_dual_mov_b32 v7, v3 :: v_dual_and_b32 v6, 0xffff, v0
+; GFX12-NEXT: v_mov_b32_e32 v9, v3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000a
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3
-; GFX12-NEXT: v_bfe_u32 v2, v4, 11, 1
-; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
+; GFX12-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v2, s3
+; GFX12-NEXT: v_bfe_u32 v4, v6, 11, 1
+; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
+; GFX12-NEXT: v_mov_b32_e32 v1, v3
+; GFX12-NEXT: v_bfe_u32 v0, v6, 8, 1
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x1000c
-; GFX12-NEXT: v_mov_b32_e32 v5, v1
-; GFX12-NEXT: v_bfe_u32 v6, v4, 5, 1
-; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
-; GFX12-NEXT: v_mov_b32_e32 v0, s4
+; GFX12-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:80
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_mov_b32_e32 v2, s3
+; GFX12-NEXT: s_bfe_u32 s3, s2, 0x1000d
+; GFX12-NEXT: v_lshrrev_b32_e32 v8, 15, v6
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v4, s3
; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10007
+; GFX12-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:64
+; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10006
-; GFX12-NEXT: v_mov_b32_e32 v9, v1
-; GFX12-NEXT: s_bfe_u32 s6, s2, 0x10002
-; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
+; GFX12-NEXT: v_bfe_u32 v0, v6, 5, 1
+; GFX12-NEXT: v_bfe_u32 v6, v6, 14, 1
+; GFX12-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:96
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v2, s4
+; GFX12-NEXT: v_mov_b32_e32 v4, s3
+; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10004
+; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10002
+; GFX12-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:48
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: v_mov_b32_e32 v0, s4
; GFX12-NEXT: v_mov_b32_e32 v2, s3
-; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10004
-; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10009
-; GFX12-NEXT: s_bfe_u32 s5, s2, 0x10001
-; GFX12-NEXT: v_lshrrev_b32_e32 v10, 15, v4
-; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
+; GFX12-NEXT: v_mov_b32_e32 v4, v0
+; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10003
+; GFX12-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:32
+; GFX12-NEXT: v_mov_b32_e32 v2, s4
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: v_mov_b32_e32 v0, s4
-; GFX12-NEXT: v_mov_b32_e32 v2, v6
-; GFX12-NEXT: s_bfe_u32 s4, s2, 0x10003
+; GFX12-NEXT: v_mov_b32_e32 v4, s3
+; GFX12-NEXT: s_bfe_u32 s3, s2, 0x10001
; GFX12-NEXT: s_and_b32 s2, s2, 1
-; GFX12-NEXT: v_bfe_u32 v8, v4, 14, 1
-; GFX12-NEXT: v_bfe_u32 v4, v4, 8, 1
-; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
-; GFX12-NEXT: v_mov_b32_e32 v0, s6
+; GFX12-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:16
; GFX12-NEXT: s_wait_alu 0xfffe
-; GFX12-NEXT: v_mov_b32_e32 v2, s4
-; GFX12-NEXT: v_mov_b32_e32 v6, s3
-; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
-; GFX12-NEXT: v_mov_b32_e32 v0, s2
-; GFX12-NEXT: v_mov_b32_e32 v2, s5
-; GFX12-NEXT: s_clause 0x2
-; GFX12-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:112
-; GFX12-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:64
-; GFX12-NEXT: global_store_b128 v1, v[0:3], s[0:1]
+; GFX12-NEXT: v_mov_b32_e32 v2, s2
+; GFX12-NEXT: v_mov_b32_e32 v4, s3
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: global_store_b128 v3, v[6:9], s[0:1] offset:112
+; GFX12-NEXT: global_store_b128 v3, v[2:5], s[0:1]
; GFX12-NEXT: s_endpgm
;
; GFX1250-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -9706,169 +9706,181 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10014
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
+; GFX1250-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v2, s4
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10015
; GFX1250-NEXT: s_lshr_b32 s4, s3, 31
-; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1
+; GFX1250-NEXT: v_dual_mov_b32 v4, s5 :: v_dual_mov_b32 v5, v3
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001e
-; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10004
-; GFX1250-NEXT: s_and_b32 s7, s2, 1
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416
+; GFX1250-NEXT: s_bfe_u32 s6, s3, 0x10002
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:416
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001d
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001c
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:496
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001b
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001a
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:480
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:480
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10019
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10018
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:464
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:464
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10017
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10016
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:448
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:448
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10013
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10012
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:432
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10011
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10010
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:400
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:400
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1000f
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1000e
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:384
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:384
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1000d
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1000c
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:368
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:368
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1000b
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1000a
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:352
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:352
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10009
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10008
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:336
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:336
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10007
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10006
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:320
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:320
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10005
; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10004
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:304
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:304
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10003
-; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10002
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:288
+; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v4, s4
+; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10003
+; GFX1250-NEXT: s_mov_b32 s4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s3, 0x10001
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:288
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_lshr_b32 s4, s2, 31
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x1001e
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:272
+; GFX1250-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v4, s5
+; GFX1250-NEXT: s_and_b64 s[4:5], s[4:5], 1
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:272
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1001d
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x1001c
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:240
+; GFX1250-NEXT: v_mov_b32_e32 v2, s3
+; GFX1250-NEXT: s_lshr_b32 s3, s2, 31
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1001e
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x1001d
+; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:256
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1001b
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x1001a
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:224
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1001c
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:240
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10019
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10018
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:208
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x1001b
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1001a
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:224
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10017
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10016
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10019
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10018
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:208
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10014
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10015
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10017
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10016
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:192
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s5
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10013
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10012
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160
+; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10014
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10015
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:176
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10011
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10010
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:144
+; GFX1250-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v4, s4
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10013
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10012
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:160
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1000f
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x1000e
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:128
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10011
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10010
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:144
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1000d
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x1000c
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:112
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x1000f
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1000e
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:128
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1000b
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x1000a
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:96
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x1000d
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1000c
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:112
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10009
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10008
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x1000b
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x1000a
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:96
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10007
-; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10006
-; GFX1250-NEXT: v_mov_b32_e32 v7, v1
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:64
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10009
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10008
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:80
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4
-; GFX1250-NEXT: s_mov_b32 s4, s3
-; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10001
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10007
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10006
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:64
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10005
-; GFX1250-NEXT: v_mov_b32_e32 v6, s5
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10004
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:48
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10003
-; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10001
-; GFX1250-NEXT: s_bfe_u32 s2, s2, 0x10002
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10002
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:32
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3
-; GFX1250-NEXT: s_and_b64 s[2:3], s[4:5], 1
-; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16
+; GFX1250-NEXT: v_mov_b32_e32 v4, s3
+; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10001
+; GFX1250-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250-NEXT: s_and_b32 s2, s2, 1
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1] offset:16
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s6
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:256
-; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1]
+; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v4, s3
+; GFX1250-NEXT: global_store_b128 v3, v[2:5], s[0:1]
; GFX1250-NEXT: s_endpgm
%load = load <64 x i1>, ptr addrspace(4) %in
%ext = zext <64 x i1> %load to <64 x i64>
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index d23c49165ec70..388006281abdc 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -6359,7 +6359,6 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v3
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v2
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v0
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16
@@ -6369,7 +6368,7 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v1, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 31, v3
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 16, v3
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v5, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v3, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16
@@ -6404,32 +6403,31 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0
; GCN-HSA-NEXT: s_add_u32 s0, s0, 16
; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1
; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0
+; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1
; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
-; GCN-HSA-NEXT: v_mov_b32_e32 v8, v3
; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2
; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0
-; GCN-HSA-NEXT: v_bfe_i32 v4, v1, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v8, v3, 0, 16
; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v1
+; GCN-HSA-NEXT: v_bfe_i32 v4, v1, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v3
; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16
; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GCN-HSA-NEXT: v_bfe_i32 v14, v9, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v8, v8, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v1
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[4:7]
; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[8:11]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[4:7]
; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15]
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3]
; GCN-HSA-NEXT: s_endpgm
@@ -6448,12 +6446,11 @@ define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(ptr addrspace(1) %out,
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v3
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v3, 0, 16
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1
; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v11, 0, 16
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
@@ -6977,27 +6974,25 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v3
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v0
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, v7
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v4
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v6
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v4
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v4, 0, 16
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 31, v5
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 16, v5
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v5, 0, 16
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 31, v7
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 16, v7
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v6, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v12, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v6, 0, 16
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 31, v5
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 16, v5
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v5, 0, 16
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 31, v7
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 16, v7
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v7, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v0, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v18, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v17, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v13, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v9, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v11, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v10, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v9, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v3
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v3
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v3, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 31, v1
@@ -7008,22 +7003,22 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v2, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v1, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v24
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:80
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0
; GCN-NOHSA-SI-NEXT: s_endpgm
;
@@ -7067,57 +7062,55 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0
; GCN-HSA-NEXT: s_add_u32 s0, s0, 64
-; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
; GCN-HSA-NEXT: v_mov_b32_e32 v27, s1
+; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
; GCN-HSA-NEXT: v_mov_b32_e32 v26, s0
; GCN-HSA-NEXT: s_waitcnt vmcnt(1)
-; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v5
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v5
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, v7
-; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[8:11]
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v6
-; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v7
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v4
-; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
-; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v9, v18, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v7
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v7
+; GCN-HSA-NEXT: v_bfe_i32 v7, v7, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7
+; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[7:10]
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v6
+; GCN-HSA-NEXT: v_bfe_i32 v7, v5, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v5
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v5
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v4
+; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[7:10]
; GCN-HSA-NEXT: v_bfe_i32 v4, v4, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v9, v11, 0, 16
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7
-; GCN-HSA-NEXT: v_bfe_i32 v6, v19, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v6, v14, 0, 16
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[7:10]
; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, v3
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v2
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v0
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v0
; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v4, v1, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v1
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v1
+; GCN-HSA-NEXT: v_bfe_i32 v4, v3, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v6, 16, v3
; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v2, v18, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GCN-HSA-NEXT: v_bfe_i32 v2, v15, 0, 16
; GCN-HSA-NEXT: v_bfe_i32 v14, v14, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v8, v15, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v1
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v1
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7]
-; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[8:11]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[4:7]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[8:11]
; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[12:15]
; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[0:3]
; GCN-HSA-NEXT: s_endpgm
@@ -7132,62 +7125,60 @@ define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[1:4], off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[5:8], off, s[8:11], 0 offset:16
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1)
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v2
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v1
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v5
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v3
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v20, 16, v6
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v22, v7
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v5, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v6, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v6
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v3, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v3
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v3, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v1, v6, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v2
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v4, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v4
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v2, 31, v1
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 offset:80
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v5, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v7
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v9, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v11, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v13, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v3, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v5
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v8
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v7, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v10, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v11, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v12, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v14, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v3, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v8, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v22, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v20, 0, 16
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:96
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:64
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:112
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:48
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:32
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:16
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; GCN-NOHSA-VI-NEXT: s_endpgm
;
; EG-LABEL: global_sextload_v16i16_to_v16i64:
@@ -8116,19 +8107,17 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3)
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v14
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v15
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v12
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2)
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v3
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v16, 16, v0
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, v7
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v4
; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v10
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v23, 0, 16
+; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v22, 16, v8
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v15
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v15
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v15, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:240
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
@@ -8137,11 +8126,10 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v13, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:208
-; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v11
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v27, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v3
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v3
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v3, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:176
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
@@ -8150,35 +8138,34 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v1, 0, 16
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:144
-; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v8
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v22, 0, 16
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v7
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 16, v7
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:112
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v7
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v7
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v7, 0, 16
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:112
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v5
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 16, v5
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v5, 0, 16
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:80
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v5
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v5
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v5, 0, 16
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:80
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v11
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 16, v11
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v13, 0, 16
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:48
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v11
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v11
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v11, 0, 16
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:48
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v12, 0, 16
; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0)
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v14, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v14, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v21, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v20, 0, 16
-; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v1, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v20, 0, 16
+; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v22, 0, 16
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v8, 0, 16
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
-; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 31, v24
-; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:224
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
+; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
+; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:224
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v9
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v9
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v9, 0, 16
@@ -8237,8 +8224,8 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0
; GCN-HSA-NEXT: s_add_u32 s2, s2, 16
; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1]
+; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4
@@ -8246,161 +8233,157 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1]
; GCN-HSA-NEXT: s_add_u32 s2, s0, 32
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3
+; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 48
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v24, s3
+; GCN-HSA-NEXT: v_mov_b32_e32 v23, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 16
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v26, s3
+; GCN-HSA-NEXT: v_mov_b32_e32 v25, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1
-; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0
+; GCN-HSA-NEXT: v_mov_b32_e32 v20, s1
+; GCN-HSA-NEXT: v_mov_b32_e32 v19, s0
; GCN-HSA-NEXT: s_waitcnt vmcnt(3)
-; GCN-HSA-NEXT: v_bfe_i32 v16, v13, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v13
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v13
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[16:19]
-; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v14
-; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v15
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v15
+; GCN-HSA-NEXT: v_bfe_i32 v15, v15, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[15:18]
+; GCN-HSA-NEXT: v_mov_b32_e32 v24, s3
+; GCN-HSA-NEXT: v_bfe_i32 v15, v13, 0, 16
+; GCN-HSA-NEXT: v_mov_b32_e32 v23, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0
-; GCN-HSA-NEXT: v_bfe_i32 v18, v13, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v16, v14, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v13
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v13
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v14
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18
-; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19]
-; GCN-HSA-NEXT: v_mov_b32_e32 v23, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v13, v15
-; GCN-HSA-NEXT: v_mov_b32_e32 v22, s2
+; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[15:18]
+; GCN-HSA-NEXT: s_waitcnt vmcnt(4)
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v10
+; GCN-HSA-NEXT: v_bfe_i32 v15, v13, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v13, v14, 0, 16
+; GCN-HSA-NEXT: v_mov_b32_e32 v18, s3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GCN-HSA-NEXT: v_mov_b32_e32 v17, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0
-; GCN-HSA-NEXT: v_bfe_i32 v13, v13, 0, 16
+; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[13:16]
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v15
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[13:16]
-; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3
; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v12
-; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2
-; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
; GCN-HSA-NEXT: v_bfe_i32 v12, v12, 0, 16
; GCN-HSA-NEXT: v_bfe_i32 v14, v14, 0, 16
-; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3
-; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[12:15]
-; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2
-; GCN-HSA-NEXT: s_waitcnt vmcnt(6)
-; GCN-HSA-NEXT: v_bfe_i32 v12, v9, 0, 16
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v28, 16, v10
-; GCN-HSA-NEXT: v_mov_b32_e32 v29, v11
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v8
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v9
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v9
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
-; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15]
-; GCN-HSA-NEXT: v_bfe_i32 v16, v29, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v12, v8, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v14, v18, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v8, v10, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v10, v28, 0, 16
+; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2
+; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0
+; GCN-HSA-NEXT: flat_store_dwordx4 v[19:20], v[12:15]
+; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v11
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v11
+; GCN-HSA-NEXT: v_bfe_i32 v11, v11, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; GCN-HSA-NEXT: v_mov_b32_e32 v26, s3
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v28, 16, v8
+; GCN-HSA-NEXT: flat_store_dwordx4 v[17:18], v[11:14]
+; GCN-HSA-NEXT: v_mov_b32_e32 v25, s2
+; GCN-HSA-NEXT: v_bfe_i32 v11, v9, 0, 16
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v11
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 16, v11
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v9
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v9
+; GCN-HSA-NEXT: v_bfe_i32 v15, v10, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; GCN-HSA-NEXT: v_bfe_i32 v10, v28, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v17, v27, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v8, v8, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GCN-HSA-NEXT: s_waitcnt vmcnt(5)
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v20, 16, v2
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v2
+; GCN-HSA-NEXT: flat_store_dwordx4 v[25:26], v[11:14]
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v17
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
-; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11]
-; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[16:19]
-; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[12:15]
-; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v10, v20, 0, 16
-; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
-; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2
+; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16
+; GCN-HSA-NEXT: v_bfe_i32 v14, v19, 0, 16
+; GCN-HSA-NEXT: flat_store_dwordx4 v[23:24], v[15:18]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[8:11]
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0
-; GCN-HSA-NEXT: v_mov_b32_e32 v21, v3
-; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[12:15]
+; GCN-HSA-NEXT: v_bfe_i32 v10, v3, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v3
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v3
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2
-; GCN-HSA-NEXT: v_bfe_i32 v8, v21, 0, 16
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v0
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3
; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v26, 16, v0
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v6
-; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v4
-; GCN-HSA-NEXT: v_bfe_i32 v12, v4, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v16, v6, 0, 16
-; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[8:11]
-; GCN-HSA-NEXT: v_bfe_i32 v6, v5, 0, 16
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v5
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v5
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 31, v1
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v1
+; GCN-HSA-NEXT: v_bfe_i32 v20, v1, 0, 16
; GCN-HSA-NEXT: v_bfe_i32 v24, v0, 0, 16
-; GCN-HSA-NEXT: v_bfe_i32 v26, v26, 0, 16
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
-; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
+; GCN-HSA-NEXT: v_bfe_i32 v26, v27, 0, 16
+; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 31, v24
-; GCN-HSA-NEXT: v_bfe_i32 v14, v27, 0, 16
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 31, v26
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
+; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90
+; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27]
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[24:27]
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
-; GCN-HSA-NEXT: v_bfe_i32 v20, v1, 0, 16
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
-; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v23, 31, v1
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v22, 16, v1
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v6
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v21, 31, v20
-; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[20:23]
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
-; GCN-HSA-NEXT: v_mov_b32_e32 v19, v7
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
+; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60
+; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v4
+; GCN-HSA-NEXT: v_bfe_i32 v16, v6, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GCN-HSA-NEXT: v_bfe_i32 v18, v18, 0, 16
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
-; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
+; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[20:23]
+; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GCN-HSA-NEXT: v_bfe_i32 v0, v19, 0, 16
+; GCN-HSA-NEXT: flat_store_dwordx4 v[2:3], v[10:13]
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
+; GCN-HSA-NEXT: v_bfe_i32 v10, v19, 0, 16
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70
+; GCN-HSA-NEXT: v_bfe_i32 v2, v7, 0, 16
+; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19]
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
-; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[16:19]
-; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v7
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v2, 16, v7
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2
+; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
+; GCN-HSA-NEXT: v_bfe_i32 v8, v4, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v5
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v5
+; GCN-HSA-NEXT: v_bfe_i32 v12, v5, 0, 16
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v7
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v4, 16, v7
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2
+; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: s_add_u32 s2, s0, 64
-; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[2:5]
; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3
; GCN-HSA-NEXT: s_add_u32 s0, s0, 0x50
-; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[8:11]
; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0
-; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6
+; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
-; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[6:9]
+; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15]
; GCN-HSA-NEXT: s_endpgm
;
; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64:
@@ -8413,115 +8396,110 @@ define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(ptr addrspace(1) %ou
; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6
; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[5:8], off, s[8:11], 0
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[13:16], off, s[8:11], 0 offset:48
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[9:12], off, s[8:11], 0 offset:32
-; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[1:4], off, s[8:11], 0 offset:16
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[7:10], off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[15:18], off, s[8:11], 0 offset:48
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[11:14], off, s[8:11], 0 offset:32
+; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[3:6], off, s[8:11], 0 offset:16
; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4
; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3)
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v8
; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2)
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v15, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v15
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v15, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, v16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v14
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v6, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v16, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:240
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v13
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v13, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v17
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v17, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v18
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:224
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v18, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v15
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:240
+; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2)
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v4, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v15, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v16
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:192
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v16, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v13
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:208
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v20, 16, v9
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v13, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v14
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:160
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v14, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v15, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v2, 0, 16
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v6, 0, 16
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:208
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
-; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4)
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v11
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v13, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v11
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:176
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v9, 0, 16
; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v11, 0, 16
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:192
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v2, 0, 16
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, v12
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v12
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:160
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v19, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v18, v8
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v16, 16, v8
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v8, 16, v9
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:176
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v8, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v9, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v10
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:128
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v9, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v7, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v12, 16, v7
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v10, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:144
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v5
-; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8)
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v3, 0, 16
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v3, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v12
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:128
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v5, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v5
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v2, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v12, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v3
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v6
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v8, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v7, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v10, 0, 16
+; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v10
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:96
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v5, 0, 16
-; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v4
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v27, v5, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v29, v4, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v2, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v17, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v18, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v16, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v12, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v23, v1, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v3, 0, 16
-; GCN-NOHSA-VI-NEXT: v_bfe_i32 v25, v7, 0, 16
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v28, 31, v27
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v30, 31, v29
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v4, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v6, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[14:17], off, s[0:3], 0 offset:144
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v19, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v7, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v20, 0, 16
+; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v3, 0, 16
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 31, v21
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v24, 31, v23
-; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v26, 31, v25
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:64
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:32
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:48
-; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 31, v20
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 31, v22
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 31, v24
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 31, v26
+; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 31, v28
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:112
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:64
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:80
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:32
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:48
+; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0
; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; GCN-NOHSA-VI-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index b4c0b7497b95f..04d906ca6ad9c 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -6137,9 +6137,8 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; SI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
; SI-NEXT: v_mov_b32_e32 v16, s0
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v9, v3
; SI-NEXT: v_lshrrev_b32_e32 v11, 16, v2
-; SI-NEXT: v_lshrrev_b32_e32 v12, 16, v0
+; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v0
; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v1
; SI-NEXT: v_ashrrev_i32_e32 v4, 16, v1
; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v3
@@ -6147,9 +6146,9 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; SI-NEXT: v_bfe_i32 v0, v0, 0, 16
; SI-NEXT: v_bfe_i32 v8, v1, 0, 16
; SI-NEXT: v_bfe_i32 v2, v2, 0, 16
-; SI-NEXT: v_bfe_i32 v10, v9, 0, 16
+; SI-NEXT: v_bfe_i32 v10, v3, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; SI-NEXT: v_bfe_i32 v12, v12, 0, 16
+; SI-NEXT: v_bfe_i32 v12, v9, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; SI-NEXT: v_bfe_i32 v14, v11, 0, 16
@@ -6171,14 +6170,13 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, s0
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v10, 16, v3
-; VI-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v2, v3
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; VI-NO-DS128-NEXT: v_bfe_i32 v10, v10, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v2, v3, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v6, v5, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v8, v7, 0, 16
@@ -6206,14 +6204,13 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v0 offset1:1
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v16, s0
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v3
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v2, v3
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v1
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v2, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v3, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v4, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v5, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v7, 0, 16
@@ -6331,14 +6328,13 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; VI-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; VI-DS128-NEXT: v_bfe_i32 v6, v0, 0, 16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; VI-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16
-; VI-DS128-NEXT: v_mov_b32_e32 v0, v3
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; VI-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; VI-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -6365,15 +6361,14 @@ define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(ptr addrspace(3) %out,
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; GFX9-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v6, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v2
-; GFX9-DS128-NEXT: v_bfe_i32 v12, v2, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v3
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX9-DS128-NEXT: v_bfe_i32 v8, v1, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-; GFX9-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v14, v0, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v10, v1, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
@@ -6813,18 +6808,16 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; SI-NEXT: ds_read2_b64 v[4:7], v4 offset1:1
; SI-NEXT: v_mov_b32_e32 v18, s0
; SI-NEXT: s_waitcnt lgkmcnt(1)
-; SI-NEXT: v_mov_b32_e32 v12, v3
; SI-NEXT: v_lshrrev_b32_e32 v15, 16, v2
; SI-NEXT: v_lshrrev_b32_e32 v16, 16, v0
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: v_mov_b32_e32 v14, v7
-; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v6
-; SI-NEXT: v_lshrrev_b32_e32 v19, 16, v4
+; SI-NEXT: v_lshrrev_b32_e32 v14, 16, v6
+; SI-NEXT: v_lshrrev_b32_e32 v17, 16, v4
; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v5
; SI-NEXT: v_ashrrev_i32_e32 v8, 16, v5
; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v3
; SI-NEXT: v_ashrrev_i32_e32 v10, 16, v3
-; SI-NEXT: v_bfe_i32 v12, v12, 0, 16
+; SI-NEXT: v_bfe_i32 v12, v3, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; SI-NEXT: ds_write2_b64 v18, v[12:13], v[10:11] offset0:14 offset1:15
; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v1
@@ -6834,7 +6827,7 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; SI-NEXT: ds_write2_b64 v18, v[12:13], v[10:11] offset0:10 offset1:11
; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v7
; SI-NEXT: v_ashrrev_i32_e32 v10, 16, v7
-; SI-NEXT: v_bfe_i32 v12, v14, 0, 16
+; SI-NEXT: v_bfe_i32 v12, v7, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; SI-NEXT: ds_write2_b64 v18, v[12:13], v[10:11] offset0:6 offset1:7
; SI-NEXT: v_bfe_i32 v1, v4, 0, 16
@@ -6842,10 +6835,10 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; SI-NEXT: v_bfe_i32 v5, v6, 0, 16
; SI-NEXT: v_bfe_i32 v10, v0, 0, 16
; SI-NEXT: v_bfe_i32 v7, v2, 0, 16
-; SI-NEXT: v_bfe_i32 v12, v19, 0, 16
+; SI-NEXT: v_bfe_i32 v12, v17, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; SI-NEXT: v_bfe_i32 v14, v17, 0, 16
+; SI-NEXT: v_bfe_i32 v14, v14, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; SI-NEXT: v_bfe_i32 v16, v16, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v10
@@ -6891,38 +6884,36 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v7
; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:12 offset1:13
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v7
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v16, v7
-; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v18, v3
+; VI-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:14 offset1:15
-; VI-NO-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[14:15] offset0:14 offset1:15
+; VI-NO-DS128-NEXT: v_bfe_i32 v6, v2, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v2, v3, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v12, v11, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; VI-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; VI-NO-DS128-NEXT: v_bfe_i32 v16, v0, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:6 offset1:7
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[12:13] offset0:4 offset1:5
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[10:11] offset0:2 offset1:3
-; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[8:9] offset1:1
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[4:5] offset0:6 offset1:7
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[12:13] offset0:4 offset1:5
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[10:11] offset0:2 offset1:3
+; VI-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[8:9] offset1:1
; VI-NO-DS128-NEXT: s_endpgm
;
; GFX9-NO-DS128-LABEL: local_sextload_v16i16_to_v16i64:
@@ -6953,38 +6944,36 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v7
; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:12 offset1:13
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v7
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v16, v7
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v18, v3
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v6, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v0
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v1
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v2
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[14:15] offset0:14 offset1:15
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v4, v18, 0, 16
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[14:15] offset0:14 offset1:15
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v2, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v3, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v9, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v11, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v6, v1, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v0, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v0, v1, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[14:15], v[4:5] offset0:6 offset1:7
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[12:13] offset0:4 offset1:5
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[10:11] offset0:2 offset1:3
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[8:9] offset1:1
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[2:3], v[4:5] offset0:6 offset1:7
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[6:7], v[12:13] offset0:4 offset1:5
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[0:1], v[10:11] offset0:2 offset1:3
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v19, v[16:17], v[8:9] offset1:1
; GFX9-NO-DS128-NEXT: s_endpgm
;
; EG-LABEL: local_sextload_v16i16_to_v16i64:
@@ -7167,11 +7156,12 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-DS128-NEXT: ds_read_b128 v[3:6], v0
; VI-DS128-NEXT: ds_read_b128 v[7:10], v0 offset:16
; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
-; VI-DS128-NEXT: v_mov_b32_e32 v18, v6
+; VI-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v8
; VI-DS128-NEXT: v_bfe_i32 v11, v8, 0, 16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v8
-; VI-DS128-NEXT: v_bfe_i32 v13, v8, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v13, v3, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_mov_b32_e32 v8, s0
@@ -7179,44 +7169,41 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_bfe_i32 v11, v7, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
-; VI-DS128-NEXT: v_mov_b32_e32 v15, v10
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v10
; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:64
-; VI-DS128-NEXT: v_bfe_i32 v11, v15, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v11, v10, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v9
+; VI-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; VI-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:112
+; VI-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v9
; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6
+; VI-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
-; VI-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v5
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:96
-; VI-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v16, v19, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v0, v3, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v4, v6, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v14, v5, 0, 16
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; VI-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:48
-; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; VI-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:48
+; VI-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:32
; VI-DS128-NEXT: ds_write_b128 v8, v[10:13] offset:16
; VI-DS128-NEXT: ds_write_b128 v8, v[0:3]
; VI-DS128-NEXT: s_endpgm
@@ -7242,43 +7229,41 @@ define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_bfe_i32 v11, v7, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7
; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v15, v10
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v10
; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:64
-; GFX9-DS128-NEXT: v_bfe_i32 v11, v15, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v11, v10, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v9
+; GFX9-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
; GFX9-DS128-NEXT: ds_write_b128 v8, v[11:14] offset:112
+; GFX9-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v9
; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v18, v6
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v6
+; GFX9-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-DS128-NEXT: v_bfe_i32 v10, v4, 0, 16
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v5
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v5
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:96
-; GFX9-DS128-NEXT: v_bfe_i32 v14, v18, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v16, v19, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v12, v4, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v4, v5, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v4, v6, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v6, v7, 0, 16
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GFX9-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v14, v5, 0, 16
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:48
-; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:32
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GFX9-DS128-NEXT: ds_write_b128 v8, v[4:7] offset:48
+; GFX9-DS128-NEXT: ds_write_b128 v8, v[14:17] offset:32
; GFX9-DS128-NEXT: ds_write_b128 v8, v[10:13] offset:16
; GFX9-DS128-NEXT: ds_write_b128 v8, v[0:3]
; GFX9-DS128-NEXT: s_endpgm
@@ -8032,10 +8017,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; SI-NEXT: ds_read2_b64 v[8:11], v12 offset0:6 offset1:7
; SI-NEXT: ds_read2_b64 v[12:15], v12 offset0:4 offset1:5
; SI-NEXT: s_waitcnt lgkmcnt(3)
-; SI-NEXT: v_mov_b32_e32 v18, v7
; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v7
; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v7
-; SI-NEXT: v_bfe_i32 v18, v18, 0, 16
+; SI-NEXT: v_bfe_i32 v18, v7, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; SI-NEXT: v_mov_b32_e32 v7, s0
; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:14 offset1:15
@@ -8045,10 +8029,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:10 offset1:11
; SI-NEXT: s_waitcnt lgkmcnt(4)
-; SI-NEXT: v_mov_b32_e32 v5, v3
; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v3
; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v3
-; SI-NEXT: v_bfe_i32 v18, v5, 0, 16
+; SI-NEXT: v_bfe_i32 v18, v3, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:6 offset1:7
; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v1
@@ -8057,10 +8040,9 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:2 offset1:3
; SI-NEXT: s_waitcnt lgkmcnt(5)
-; SI-NEXT: v_mov_b32_e32 v1, v11
; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v11
; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v11
-; SI-NEXT: v_bfe_i32 v18, v1, 0, 16
+; SI-NEXT: v_bfe_i32 v18, v11, 0, 16
; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:30 offset1:31
; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v9
@@ -8069,12 +8051,11 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:26 offset1:27
; SI-NEXT: s_waitcnt lgkmcnt(6)
-; SI-NEXT: v_mov_b32_e32 v1, v15
-; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v15
-; SI-NEXT: v_bfe_i32 v17, v1, 0, 16
-; SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17
-; SI-NEXT: ds_write2_b64 v7, v[17:18], v[15:16] offset0:22 offset1:23
+; SI-NEXT: v_ashrrev_i32_e32 v17, 31, v15
+; SI-NEXT: v_ashrrev_i32_e32 v16, 16, v15
+; SI-NEXT: v_bfe_i32 v18, v15, 0, 16
+; SI-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; SI-NEXT: ds_write2_b64 v7, v[18:19], v[16:17] offset0:22 offset1:23
; SI-NEXT: v_ashrrev_i32_e32 v16, 31, v13
; SI-NEXT: v_ashrrev_i32_e32 v15, 16, v13
; SI-NEXT: v_bfe_i32 v17, v13, 0, 16
@@ -8135,19 +8116,19 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-NO-DS128-NEXT: s_mov_b32 m0, -1
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NO-DS128-NEXT: v_mov_b32_e32 v7, s1
-; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v7 offset0:6 offset1:7
-; VI-NO-DS128-NEXT: ds_read2_b64 v[12:15], v7 offset0:4 offset1:5
+; VI-NO-DS128-NEXT: v_mov_b32_e32 v4, s1
+; VI-NO-DS128-NEXT: ds_read2_b64 v[0:3], v4 offset0:6 offset1:7
+; VI-NO-DS128-NEXT: ds_read2_b64 v[12:15], v4 offset0:4 offset1:5
; VI-NO-DS128-NEXT: v_mov_b32_e32 v11, s0
; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(1)
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v3
-; VI-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v18, v3, 0, 16
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; VI-NO-DS128-NEXT: v_bfe_i32 v16, v3, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v18, v5, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18
-; VI-NO-DS128-NEXT: ds_read2_b64 v[3:6], v7 offset0:2 offset1:3
-; VI-NO-DS128-NEXT: ds_read2_b64 v[7:10], v7 offset1:1
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[18:19], v[16:17] offset0:30 offset1:31
+; VI-NO-DS128-NEXT: ds_read2_b64 v[7:10], v4 offset0:2 offset1:3
+; VI-NO-DS128-NEXT: ds_read2_b64 v[3:6], v4 offset1:1
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[16:17], v[18:19] offset0:30 offset1:31
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v2
; VI-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v18, v2, 0, 16
@@ -8179,87 +8160,86 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v13
+; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(5)
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v4
; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[15:16] offset0:20 offset1:21
; VI-NO-DS128-NEXT: v_bfe_i32 v14, v14, 0, 16
; VI-NO-DS128-NEXT: v_bfe_i32 v16, v13, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; VI-NO-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v5
; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[16:17], v[14:15] offset0:18 offset1:19
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v12
-; VI-NO-DS128-NEXT: v_bfe_i32 v15, v12, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v17, v16, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v12
+; VI-NO-DS128-NEXT: v_bfe_i32 v17, v12, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v19, v0, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[15:16], v[17:18] offset0:16 offset1:17
-; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(9)
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v6
-; VI-NO-DS128-NEXT: v_bfe_i32 v15, v15, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v17, v6, 0, 16
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v10
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[19:20] offset0:16 offset1:17
+; VI-NO-DS128-NEXT: v_bfe_i32 v17, v0, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v19, v10, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v5
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[15:16] offset0:14 offset1:15
-; VI-NO-DS128-NEXT: v_bfe_i32 v15, v6, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v5, v5, 0, 16
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v12, 16, v4
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5
-; VI-NO-DS128-NEXT: s_waitcnt lgkmcnt(9)
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[5:6], v[15:16] offset0:12 offset1:13
-; VI-NO-DS128-NEXT: v_bfe_i32 v5, v12, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v15, v4, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19
; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v9
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[15:16], v[5:6] offset0:10 offset1:11
-; VI-NO-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v3
-; VI-NO-DS128-NEXT: v_bfe_i32 v17, v3, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v21, v0, 0, 16
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v10
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v22, 31, v21
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[19:20], v[17:18] offset0:14 offset1:15
+; VI-NO-DS128-NEXT: v_bfe_i32 v17, v0, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v9, v9, 0, 16
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
-; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v7
-; VI-NO-DS128-NEXT: v_bfe_i32 v19, v19, 0, 16
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[21:22] offset0:8 offset1:9
-; VI-NO-DS128-NEXT: v_bfe_i32 v17, v10, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v1, v1, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v4, v7, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v6, v8, 0, 16
-; VI-NO-DS128-NEXT: v_bfe_i32 v8, v9, 0, 16
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[9:10], v[17:18] offset0:12 offset1:13
+; VI-NO-DS128-NEXT: v_bfe_i32 v9, v0, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v17, v8, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v7
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[9:10] offset0:10 offset1:11
+; VI-NO-DS128-NEXT: v_bfe_i32 v17, v0, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v19, v7, 0, 16
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v12, 16, v6
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19
+; VI-NO-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NO-DS128-NEXT: v_bfe_i32 v8, v12, 0, 16
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[19:20], v[17:18] offset0:8 offset1:9
+; VI-NO-DS128-NEXT: v_bfe_i32 v17, v5, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v5, v6, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v1, v1, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
+; VI-NO-DS128-NEXT: v_bfe_i32 v21, v3, 0, 16
+; VI-NO-DS128-NEXT: v_bfe_i32 v3, v4, 0, 16
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[19:20] offset0:6 offset1:7
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[8:9], v[15:16] offset0:4 offset1:5
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[6:7], v[13:14] offset0:2 offset1:3
-; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[4:5], v[1:2] offset1:1
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v22, 31, v21
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; VI-NO-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[5:6], v[8:9] offset0:6 offset1:7
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[17:18], v[15:16] offset0:4 offset1:5
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[3:4], v[13:14] offset0:2 offset1:3
+; VI-NO-DS128-NEXT: ds_write2_b64 v11, v[21:22], v[1:2] offset1:1
; VI-NO-DS128-NEXT: s_endpgm
;
; GFX9-NO-DS128-LABEL: local_sextload_v32i16_to_v32i64:
; GFX9-NO-DS128: ; %bb.0:
; GFX9-NO-DS128-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v8, s1
-; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v8 offset0:6 offset1:7
-; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v8 offset0:4 offset1:5
+; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v11, s1
+; GFX9-NO-DS128-NEXT: ds_read2_b64 v[4:7], v11 offset0:6 offset1:7
+; GFX9-NO-DS128-NEXT: ds_read2_b64 v[0:3], v11 offset0:4 offset1:5
; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v15, s0
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(1)
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v9, 16, v7
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v9, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v7, 0, 16
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v8, 16, v7
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v7, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v8, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18
-; GFX9-NO-DS128-NEXT: ds_read2_b64 v[11:14], v8 offset1:1
-; GFX9-NO-DS128-NEXT: ds_read2_b64 v[7:10], v8 offset0:2 offset1:3
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[18:19], v[16:17] offset0:30 offset1:31
+; GFX9-NO-DS128-NEXT: ds_read2_b64 v[7:10], v11 offset1:1
+; GFX9-NO-DS128-NEXT: ds_read2_b64 v[11:14], v11 offset0:2 offset1:3
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[18:19] offset0:30 offset1:31
; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v6
; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v6, 0, 16
@@ -8296,63 +8276,62 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v1, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[2:3] offset0:18 offset1:19
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v0, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v4, 0, 16
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v17, 16, v0
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v0, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v17, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[3:4], v[16:17] offset0:16 offset1:17
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[18:19] offset0:16 offset1:17
; GFX9-NO-DS128-NEXT: s_waitcnt lgkmcnt(8)
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v10
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v10, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v16, 16, v14
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v16, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v14, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[3:4] offset0:14 offset1:15
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v9
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v9, v9, 0, 16
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[9:10], v[3:4] offset0:12 offset1:13
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v0, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v8, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v12
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[8:9], v[3:4] offset0:10 offset1:11
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v7
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v1, v20, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v7, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v20, v4, 0, 16
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v18, 16, v13
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v19, 16, v14
-; GFX9-NO-DS128-NEXT: v_mov_b32_e32 v0, v14
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 31, v20
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v14, 16, v13
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[18:19], v[16:17] offset0:14 offset1:15
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v14, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v13, v13, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v11
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v8, v12, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v18, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v19, 0, 16
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[20:21] offset0:8 offset1:9
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[13:14], v[16:17] offset0:12 offset1:13
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v12
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v10
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v13, v13, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v12, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v12, v0, 0, 16
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v11
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[13:14] offset0:10 offset1:11
; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v0, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v5, v5, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v11, 0, 16
-; GFX9-NO-DS128-NEXT: v_bfe_i32 v10, v13, 0, 16
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v18, v11, 0, 16
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v19, 31, v18
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v7
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v20, 16, v8
+; GFX9-NO-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v9
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[18:19], v[16:17] offset0:8 offset1:9
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v16, v9, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v9, v10, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v5, v5, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v1, v20, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v3, v3, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v20, v7, 0, 16
+; GFX9-NO-DS128-NEXT: v_bfe_i32 v7, v8, 0, 16
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v6, 31, v5
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v2, 31, v1
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v13, 31, v12
; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v9, 31, v8
-; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v11, 31, v10
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[18:19] offset0:6 offset1:7
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[10:11], v[12:13] offset0:4 offset1:5
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[8:9], v[1:2] offset0:2 offset1:3
-; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[3:4], v[5:6] offset1:1
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v21, 31, v20
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v8, 31, v7
+; GFX9-NO-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[9:10], v[12:13] offset0:6 offset1:7
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[16:17], v[3:4] offset0:4 offset1:5
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[7:8], v[1:2] offset0:2 offset1:3
+; GFX9-NO-DS128-NEXT: ds_write2_b64 v15, v[20:21], v[5:6] offset1:1
; GFX9-NO-DS128-NEXT: s_endpgm
;
; EG-LABEL: local_sextload_v32i16_to_v32i64:
@@ -8715,11 +8694,10 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_bfe_i32 v15, v2, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; VI-DS128-NEXT: v_mov_b32_e32 v2, v3
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v3
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:224
-; VI-DS128-NEXT: v_bfe_i32 v13, v2, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v15, v3, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v15, v2, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v13, v3, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v0
@@ -8736,44 +8714,38 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v11
; VI-DS128-NEXT: v_bfe_i32 v0, v11, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:192
-; VI-DS128-NEXT: v_mov_b32_e32 v13, v12
; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:160
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v12
-; VI-DS128-NEXT: v_bfe_i32 v0, v13, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v0, v12, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v10
; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:176
-; VI-DS128-NEXT: v_bfe_i32 v0, v9, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v9
-; VI-DS128-NEXT: v_bfe_i32 v9, v10, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v11, v11, 0, 16
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
-; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; VI-DS128-NEXT: v_bfe_i32 v0, v9, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v2, v2, 0, 16
-; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:144
-; VI-DS128-NEXT: s_waitcnt lgkmcnt(8)
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v11, 16, v19
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 31, v0
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
-; VI-DS128-NEXT: v_bfe_i32 v9, v19, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v11, v11, 0, 16
; VI-DS128-NEXT: ds_write_b128 v8, v[0:3] offset:128
-; VI-DS128-NEXT: s_waitcnt lgkmcnt(8)
+; VI-DS128-NEXT: s_waitcnt lgkmcnt(6)
; VI-DS128-NEXT: v_bfe_i32 v0, v5, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v5
-; VI-DS128-NEXT: v_mov_b32_e32 v5, v20
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v10
+; VI-DS128-NEXT: v_bfe_i32 v9, v10, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v19
+; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:144
+; VI-DS128-NEXT: v_bfe_i32 v9, v19, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:96
-; VI-DS128-NEXT: v_bfe_i32 v9, v5, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v20
+; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:96
+; VI-DS128-NEXT: v_bfe_i32 v9, v20, 0, 16
; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
@@ -8783,18 +8755,20 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; VI-DS128-NEXT: v_bfe_i32 v11, v5, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v10, 31, v9
; VI-DS128-NEXT: v_ashrrev_i32_e32 v12, 31, v11
-; VI-DS128-NEXT: v_lshrrev_b32_e32 v5, 16, v18
; VI-DS128-NEXT: ds_write_b128 v8, v[9:12] offset:64
; VI-DS128-NEXT: v_bfe_i32 v9, v4, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v4
-; VI-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16
-; VI-DS128-NEXT: v_bfe_i32 v15, v5, 0, 16
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; VI-DS128-NEXT: v_bfe_i32 v11, v4, 0, 16
-; VI-DS128-NEXT: v_mov_b32_e32 v4, v7
+; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v18
+; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:192
+; VI-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v15, v4, 0, 16
; VI-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; VI-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; VI-DS128-NEXT: ds_write_b128 v8, v[13:16] offset:80
-; VI-DS128-NEXT: v_bfe_i32 v13, v4, 0, 16
+; VI-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
; VI-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v7
; VI-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v6
; VI-DS128-NEXT: v_bfe_i32 v15, v4, 0, 16
@@ -8831,11 +8805,10 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: v_bfe_i32 v16, v6, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 31, v14
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v17, 31, v16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v6, v7
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v7, 16, v7
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v7
; GFX9-DS128-NEXT: ds_write_b128 v12, v[14:17] offset:224
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v6, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v15, v7, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v15, v6, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v7, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v4
@@ -8851,44 +8824,43 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(5)
; GFX9-DS128-NEXT: v_bfe_i32 v4, v2, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GFX9-DS128-NEXT: v_bfe_i32 v6, v2, 0, 16
-; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:192
-; GFX9-DS128-NEXT: v_mov_b32_e32 v13, v3
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v3
; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] offset:160
-; GFX9-DS128-NEXT: v_bfe_i32 v4, v13, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v4, v3, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v6, v2, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v7, 31, v6
-; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] offset:176
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v6, 16, v1
; GFX9-DS128-NEXT: v_bfe_i32 v2, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v1, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v15, v6, 0, 16
+; GFX9-DS128-NEXT: ds_write_b128 v12, v[4:7] offset:176
; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
-; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(6)
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v20
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v5, 31, v4
+; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(6)
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v9
-; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:144
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v20, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v15, v1, 0, 16
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
; GFX9-DS128-NEXT: ds_write_b128 v12, v[2:5] offset:128
; GFX9-DS128-NEXT: v_bfe_i32 v4, v0, 0, 16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v21
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v1
+; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:192
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v1, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
+; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
+; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(7)
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v20
+; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:144
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v20, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:96
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v21
+; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:96
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v21, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
@@ -8896,26 +8868,25 @@ define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(ptr addrspace(3) %out
; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:112
; GFX9-DS128-NEXT: v_bfe_i32 v13, v18, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v19
-; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:64
-; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v8
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v19, 0, 16
-; GFX9-DS128-NEXT: v_bfe_i32 v15, v1, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v6, v8, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v8, v0, 0, 16
-; GFX9-DS128-NEXT: v_mov_b32_e32 v0, v11
+; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v19
+; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:64
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v19, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v14, 31, v13
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v16, 31, v15
-; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:80
-; GFX9-DS128-NEXT: v_bfe_i32 v13, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v11
+; GFX9-DS128-NEXT: ds_write_b128 v12, v[13:16] offset:80
; GFX9-DS128-NEXT: v_bfe_i32 v15, v0, 0, 16
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v0, 16, v10
; GFX9-DS128-NEXT: v_bfe_i32 v17, v10, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v19, v0, 0, 16
; GFX9-DS128-NEXT: v_bfe_i32 v2, v9, 0, 16
+; GFX9-DS128-NEXT: v_bfe_i32 v13, v11, 0, 16
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v18, 31, v17
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v20, 31, v19
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 31, v2
diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
index 2e9d1b4c8f7e5..7457509ffe193 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
@@ -1054,18 +1054,17 @@ define amdgpu_kernel void @merge_global_store_8_constants_i32(ptr addrspace(1) %
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: v_mov_b32_e32 v0, 34
-; SI-NEXT: v_mov_b32_e32 v1, 0x3e7
-; SI-NEXT: v_mov_b32_e32 v2, 0x41
-; SI-NEXT: v_mov_b32_e32 v3, 33
+; SI-NEXT: v_mov_b32_e32 v2, 34
+; SI-NEXT: v_mov_b32_e32 v3, 0x3e7
+; SI-NEXT: v_mov_b32_e32 v4, 0x41
+; SI-NEXT: v_mov_b32_e32 v5, 33
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0
+; SI-NEXT: v_mov_b32_e32 v0, 0x62
+; SI-NEXT: v_mov_b32_e32 v1, 0x5b
; SI-NEXT: s_waitcnt expcnt(0)
-; SI-NEXT: v_mov_b32_e32 v2, 0x62
-; SI-NEXT: v_mov_b32_e32 v3, 0x5b
-; SI-NEXT: v_mov_b32_e32 v4, 0xd4
-; SI-NEXT: v_mov_b32_e32 v5, v1
-; SI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
+; SI-NEXT: v_mov_b32_e32 v2, 0xd4
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; SI-NEXT: s_endpgm
;
; CI-LABEL: merge_global_store_8_constants_i32:
@@ -1073,17 +1072,16 @@ define amdgpu_kernel void @merge_global_store_8_constants_i32(ptr addrspace(1) %
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s2, -1
-; CI-NEXT: v_mov_b32_e32 v0, 34
-; CI-NEXT: v_mov_b32_e32 v1, 0x3e7
-; CI-NEXT: v_mov_b32_e32 v2, 0x41
-; CI-NEXT: v_mov_b32_e32 v3, 33
+; CI-NEXT: v_mov_b32_e32 v2, 34
+; CI-NEXT: v_mov_b32_e32 v3, 0x3e7
+; CI-NEXT: v_mov_b32_e32 v4, 0x41
+; CI-NEXT: v_mov_b32_e32 v5, 33
; CI-NEXT: s_waitcnt lgkmcnt(0)
-; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; CI-NEXT: v_mov_b32_e32 v4, 0xd4
-; CI-NEXT: v_mov_b32_e32 v2, 0x62
-; CI-NEXT: v_mov_b32_e32 v3, 0x5b
-; CI-NEXT: v_mov_b32_e32 v5, v1
-; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 offset:16
+; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0
+; CI-NEXT: v_mov_b32_e32 v0, 0x62
+; CI-NEXT: v_mov_b32_e32 v1, 0x5b
+; CI-NEXT: v_mov_b32_e32 v2, 0xd4
+; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
; CI-NEXT: s_endpgm
store i32 34, ptr addrspace(1) %out, align 4
%idx1 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
index 122d69c20c49e..c8cc40faf1e84 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
@@ -1005,10 +1005,7 @@ define amdgpu_kernel void @test_mfma_f32_4x4x1f32(ptr addrspace(1) %arg) #0 {
; FAST90A-NEXT: v_accvgpr_write_b32 a3, s7
; FAST90A-NEXT: s_nop 1
; FAST90A-NEXT: v_mfma_f32_4x4x1f32 a[0:3], v1, v2, a[0:3]
-; FAST90A-NEXT: v_mfma_f32_4x4x1f32 a[4:7], v1, v2, a[0:3]
-; FAST90A-NEXT: s_nop 4
-; FAST90A-NEXT: v_accvgpr_mov_b32 a2, a4
-; FAST90A-NEXT: v_accvgpr_mov_b32 a3, a5
+; FAST90A-NEXT: v_mfma_f32_4x4x1f32 a[2:5], v1, v2, a[0:3]
; FAST90A-NEXT: s_nop 1
; FAST90A-NEXT: v_mfma_f32_4x4x1f32 a[0:3], v1, v2, a[0:3]
; FAST90A-NEXT: s_nop 4
diff --git a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir b/llvm/test/CodeGen/AMDGPU/no-limit-coalesce.mir
similarity index 86%
rename from llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
rename to llvm/test/CodeGen/AMDGPU/no-limit-coalesce.mir
index a245c475638f2..934a536edb726 100644
--- a/llvm/test/CodeGen/AMDGPU/limit-coalesce.mir
+++ b/llvm/test/CodeGen/AMDGPU/no-limit-coalesce.mir
@@ -1,25 +1,22 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -run-pass register-coalescer -o - %s | FileCheck %s
-# Check that coalescer does not create wider register tuple than in
-# source.
-# No more registers shall be defined
+# Check that coalescer may create wider register tuple than in source.
---
-name: limit_coalesce
+name: no_limit_coalesce
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr16, $sgpr17
- ; CHECK-LABEL: name: limit_coalesce
+ ; CHECK-LABEL: name: no_limit_coalesce
; CHECK: liveins: $sgpr16, $sgpr17
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub1:sgpr_64 = COPY $sgpr17
; CHECK-NEXT: [[COPY:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr16
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK-NEXT: INLINEASM &"; def $0", 0 /* attdialect */, 2818058 /* regdef:VReg_64 */, def %4
- ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_128 = COPY %4.sub1
- ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[COPY]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
+ ; CHECK-NEXT: INLINEASM &"; def $0", 0 /* attdialect */, 2818058 /* regdef:VReg_64 */, def undef %5.sub0_sub1
+ ; CHECK-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], %5.sub1_sub2_sub3_sub4, [[COPY]], 0, 0, implicit $exec :: (store (s128), addrspace 1)
; CHECK-NEXT: SI_RETURN
%0:sgpr_32 = COPY killed $sgpr17
%1:sgpr_32 = COPY killed $sgpr16
@@ -52,7 +49,6 @@ body: |
; CHECK-NEXT: FLAT_STORE_DWORDX3 $vgpr0_vgpr1, [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr
; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF
; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub0_sub1_sub2:vreg_128 = COPY [[DEF1]]
- ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub3:vreg_128 = COPY undef [[COPY]].sub2
; CHECK-NEXT: FLAT_STORE_DWORDX4 $vgpr0_vgpr1, [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr
%2:vreg_64 = IMPLICIT_DEF
undef %3.sub0:vreg_64 = COPY $sgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
index a29dc34c56d3a..71981e3599b87 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
@@ -20,7 +20,7 @@
; CHECK-LABEL: {{^}}call_72xi32:
-; GFX11-PAL: NumSgprs: 37
+; GFX11-PAL: NumSgprs: 40
; GFX11-PAL-GCNTRACKERS: NumSgprs: 37
; GFX11-PAL: NumVgprs: 64
; GFX11-PAL-GCNTRACKERS: NumVgprs: 64
diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
index 840916aa63949..c253f42e0d3c8 100644
--- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
+++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll
@@ -596,88 +596,84 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: ps_main:
@@ -686,89 +682,85 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: ps_main:
@@ -777,92 +769,88 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: ps_main:
@@ -876,165 +864,158 @@ define amdgpu_ps float @ps_main(i32 %idx) {
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: ps_main:
; GFX11-FLATSCR: ; %bb.0:
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, v17
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v19, 0xbefcd89f :: v_dual_mov_b32 v30, v27
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v24 :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3efcd89f :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v25, 0x3f20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v36, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -1619,88 +1600,84 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: vs_main:
@@ -1709,89 +1686,85 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: vs_main:
@@ -1800,92 +1773,88 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: vs_main:
@@ -1899,165 +1868,158 @@ define amdgpu_vs float @vs_main(i32 %idx) {
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: vs_main:
; GFX11-FLATSCR: ; %bb.0:
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, v17
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v19, 0xbefcd89f :: v_dual_mov_b32 v30, v27
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v24 :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3efcd89f :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v25, 0x3f20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v36, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -2640,90 +2602,86 @@ define amdgpu_cs float @cs_main(i32 %idx) {
; GFX9-FLATSCR-LABEL: cs_main:
; GFX9-FLATSCR: ; %bb.0:
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2
-; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
+; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:320
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3e31934f
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v31, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f638e37
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:288
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v21
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0xbe319356
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], s0 offset:240
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[21:24], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v31
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v12
+; GFX9-FLATSCR-NEXT: scratch_load_dword v32, v4, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v7
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:720
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v15
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v10
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, v0
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v30, v1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v16
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v23
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v21
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:752
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v31
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:800
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v32, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: cs_main:
@@ -2732,89 +2690,85 @@ define amdgpu_cs float @cs_main(i32 %idx) {
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: cs_main:
@@ -2822,93 +2776,89 @@ define amdgpu_cs float @cs_main(i32 %idx) {
; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3]
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x10
-; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0xbf20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
+; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:320
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3e31934f
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v31, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f638e37
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:288
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v21
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[10:13], s0 offset:240
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[21:24], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v31
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v12
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v32, v4, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v7
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v15
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v10
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v0
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v16
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v23
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v21
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v31
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:800
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v32, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: cs_main:
@@ -2922,165 +2872,158 @@ define amdgpu_cs float @cs_main(i32 %idx) {
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: cs_main:
; GFX11-FLATSCR: ; %bb.0:
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, v17
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v19, 0xbefcd89f :: v_dual_mov_b32 v30, v27
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v24 :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3efcd89f :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v25, 0x3f20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v36, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -3662,88 +3605,84 @@ define amdgpu_hs float @hs_main(i32 %idx) {
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: hs_main:
@@ -3752,89 +3691,85 @@ define amdgpu_hs float @hs_main(i32 %idx) {
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: hs_main:
@@ -3843,92 +3778,88 @@ define amdgpu_hs float @hs_main(i32 %idx) {
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: hs_main:
@@ -3942,165 +3873,158 @@ define amdgpu_hs float @hs_main(i32 %idx) {
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: hs_main:
; GFX11-FLATSCR: ; %bb.0:
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, v17
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v19, 0xbefcd89f :: v_dual_mov_b32 v30, v27
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v24 :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3efcd89f :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v25, 0x3f20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v36, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -4682,88 +4606,84 @@ define amdgpu_gs float @gs_main(i32 %idx) {
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: gs_main:
@@ -4772,89 +4692,85 @@ define amdgpu_gs float @gs_main(i32 %idx) {
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: gs_main:
@@ -4863,92 +4779,88 @@ define amdgpu_gs float @gs_main(i32 %idx) {
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: gs_main:
@@ -4962,165 +4874,158 @@ define amdgpu_gs float @gs_main(i32 %idx) {
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: gs_main:
; GFX11-FLATSCR: ; %bb.0:
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v21
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v23, v21 :: v_dual_mov_b32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, v17
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, 0x3f5f2ee2 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v19, 0xbefcd89f :: v_dual_mov_b32 v30, v27
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v24 :: v_dual_mov_b32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3efcd89f :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v25, 0x3f20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v11, 0xbe31934f :: v_dual_mov_b32 v36, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -5711,89 +5616,85 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_mov_b32 s2, s7
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
@@ -5802,90 +5703,86 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-NEXT: s_mov_b32 s2, s7
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset:
@@ -5894,93 +5791,89 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset:
@@ -5994,167 +5887,158 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg,
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: hs_ir_uses_scratch_offset:
; GFX11-FLATSCR: ; %bb.0:
; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v0, 0xbeae29dc :: v_dual_mov_b32 v23, v21
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, v28 :: v_dual_mov_b32 v19, 0xbefcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v27
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, 0xbf523be3 :: v_dual_mov_b32 v23, v21
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v17, 0xbeae29dc :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v15, 0x3e319356 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe31934f
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_mov_b32 s2, s5
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
@@ -6743,89 +6627,85 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg,
; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
-; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-NEXT: s_mov_b32 s2, s7
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
@@ -6834,90 +6714,86 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg,
; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-NEXT: s_mov_b32 s2, s7
; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-NEXT: ; return to shader part epilog
;
; GFX9-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset:
@@ -6926,93 +6802,89 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg,
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8
; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0
+; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v30, 0x1fc, v0
; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff
; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5
; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:320
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], s0 offset:240
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:288
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:224
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:256
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:304
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:272
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:208
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:192
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v23
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], s0 offset:768
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:832
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:784
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:752
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:736
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:816
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2
-; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:800
-; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x200, v23
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], s0 offset:720
-; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], s0 offset:704
-; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:224
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], s0 offset:304
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], s0 offset:272
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:256
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], s0 offset:240
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], s0 offset:208
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:192
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v30
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v31, v0, off
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s0 offset:720
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:832
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf523be1
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3f3d349c
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v28
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v11
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:784
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbf5f2ee2
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v27
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], s0 offset:736
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s0 offset:816
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], s0 offset:768
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:752
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s0 offset:800
+; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v0, 0x200, v30
+; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], s0 offset:704
+; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v0, off
; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0
+; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v31, v0
; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX10-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset:
@@ -7026,167 +6898,158 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg,
; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, 0x3f3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3f5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, 0x3f523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbeae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, 0x3f638e37
; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:304
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:320
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, 0xbf5f2ee3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbefcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbefcd8a3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, 0xbf523be3
+; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v34, 0x200, v0
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[26:29], off offset:304
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:288
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:272
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f
-; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x200, v0
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[17:20], off offset:272
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f
; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0x3e31934f
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf638e39
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v22
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:224
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v25
; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:240
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:224
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:208
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:192
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:832
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:816
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:800
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:784
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11
-; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:768
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:752
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:736
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:720
-; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:704
-; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:240
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:208
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:192
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v1, v5, off
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, 0xbf523be1
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0x3f3d349c
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, v28
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v11
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3703c499
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, v29
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0x3f20e7f4
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v6
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v13
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v12
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v17
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v3
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, v26
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v22
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v27
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:784
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, 0xbf5f2ee2
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v27
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v7
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v8
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[13:16], off offset:736
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, v19
+; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, v17
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], off offset:816
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:800
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:768
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[30:33], off offset:752
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], off offset:720
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[11:14], off offset:704
; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5
; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0
+; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:832
+; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v34, off
+; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0)
+; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v1, v0
; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog
;
; GFX11-FLATSCR-LABEL: gs_ir_uses_scratch_offset:
; GFX11-FLATSCR: ; %bb.0:
; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xbf20e7f4 :: v_dual_lshlrev_b32 v0, 2, v0
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3703c499
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v28, 0x3f3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3f523be1
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf5f2ee3
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf3d349e
-; GFX11-FLATSCR-NEXT: v_and_b32_e32 v37, 0x1fc, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v0, 0xbeae29dc :: v_dual_mov_b32 v23, v21
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, 0xbf523be3
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v26, 0x3f5f2ee2
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:320
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v6
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, v28 :: v_dual_mov_b32 v19, 0xbefcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v27
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, 0xbf523be3 :: v_dual_mov_b32 v23, v21
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f638e37 :: v_dual_mov_b32 v4, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v27
+; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v17, 0xbeae29dc :: v_dual_mov_b32 v16, v24
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbf3d349e
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3eae29dc
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3eae29d8
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3e319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0x3e31934f
; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[5:8], off offset:304
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[25:28], off offset:304
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:288
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbefcd89f :: v_dual_mov_b32 v27, v24
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, v0
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v13, 0x3eae29dc :: v_dual_mov_b32 v34, v5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v15, 0x3e319356 :: v_dual_mov_b32 v36, v6
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3efcd89f
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3efcd89c
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v18, v17 :: v_dual_and_b32 v33, 0x1fc, v0
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, 0xbefcd8a3 :: v_dual_mov_b32 v15, v11
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v1, 0xb7043519 :: v_dual_mov_b32 v10, v13
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v2, 0xbe31934f :: v_dual_mov_b32 v31, v11
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758
; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v22, 0xbf638e39
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, 0x3f20e7f5 :: v_dual_mov_b32 v26, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb702e758
-; GFX11-FLATSCR-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, 0xb7043519 :: v_dual_mov_b32 v29, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe31934f
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v12, 0xbe319356 :: v_dual_mov_b32 v31, v19
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf20e7f5
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3eae29d8
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v16, 0x3e31934f
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v30, v13
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:272
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:256
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4
; GFX11-FLATSCR-NEXT: s_clause 0x1
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[6:9], off offset:240
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:224
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v14, 0x3f20e7f5 :: v_dual_mov_b32 v9, v6
+; GFX11-FLATSCR-NEXT: s_clause 0x3
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:272
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[0:3], off offset:256
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[21:24], off offset:208
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:192
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v27, v8 :: v_dual_mov_b32 v20, 0x3efcd89c
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v33, v22
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v9, v18
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x1
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:240
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:224
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v14, v37, off
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v32, 0x3f3d349c :: v_dual_mov_b32 v5, v15
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v20, v0
-; GFX11-FLATSCR-NEXT: s_clause 0x2
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[1:4], off offset:832
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[28:31], off offset:816
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[9:12], off offset:800
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v29, 0xbf523be1 :: v_dual_mov_b32 v30, v7
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v31, v17
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v3, v12
-; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v28
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v24, v19 :: v_dual_mov_b32 v35, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:192
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v33, off
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v29, 0xbf523be1
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v32, 0x3f3d349c
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v24, v21
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[4:7], off offset:832
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v7, 0x3703c499 :: v_dual_mov_b32 v16, v28
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:784
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v6
-; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v6, v13 :: v_dual_mov_b32 v19, v2
-; GFX11-FLATSCR-NEXT: s_clause 0x4
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[33:36], off offset:768
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v31, 0xbf5f2ee2 :: v_dual_mov_b32 v32, v26
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v23, v25
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v18, v12
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v20, v17 :: v_dual_mov_b32 v21, v3
+; GFX11-FLATSCR-NEXT: s_clause 0x1
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[29:32], off offset:752
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[24:27], off offset:736
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[13:16], off offset:736
+; GFX11-FLATSCR-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v14, v17
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v4, v7
+; GFX11-FLATSCR-NEXT: v_mov_b32_e32 v13, v19
+; GFX11-FLATSCR-NEXT: s_clause 0x4
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[7:10], off offset:816
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[18:21], off offset:800
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[22:25], off offset:768
; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[3:6], off offset:720
-; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[17:20], off offset:704
-; GFX11-FLATSCR-NEXT: scratch_load_b32 v0, v37, off offset:512
+; GFX11-FLATSCR-NEXT: scratch_store_b128 off, v[11:14], off offset:704
+; GFX11-FLATSCR-NEXT: scratch_load_b32 v1, v33, off offset:512
; GFX11-FLATSCR-NEXT: s_mov_b32 s2, s5
; GFX11-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v14, v0
+; GFX11-FLATSCR-NEXT: v_add_f32_e32 v0, v0, v1
; GFX11-FLATSCR-NEXT: ; return to shader part epilog
%v1 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0xBFEA477C60000000, float 0xBFEBE5DC60000000, float 0xBFEC71C720000000, float 0xBFEBE5DC60000000, float 0xBFEA477C60000000, float 0xBFE7A693C0000000, float 0xBFE41CFEA0000000, float 0x3FDF9B13E0000000, float 0x3FDF9B1380000000, float 0x3FD5C53B80000000, float 0x3FD5C53B00000000, float 0x3FC6326AC0000000, float 0x3FC63269E0000000, float 0xBEE05CEB00000000, float 0xBEE086A320000000, float 0xBFC63269E0000000, float 0xBFC6326AC0000000, float 0xBFD5C53B80000000, float 0xBFD5C53B80000000, float 0xBFDF9B13E0000000, float 0xBFDF9B1460000000, float 0xBFE41CFE80000000, float 0x3FE7A693C0000000, float 0x3FEA477C20000000, float 0x3FEBE5DC40000000, float 0x3FEC71C6E0000000, float 0x3FEBE5DC40000000, float 0x3FEA477C20000000, float 0x3FE7A693C0000000, float 0xBFE41CFE80000000>, i32 %idx
%v2 = extractelement <81 x float> <float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFEA0000000, float 0xBFE7A693C0000000, float 0x3FE7A693C0000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFEBE5DC40000000, float 0x3FEBE5DC40000000, float 0xBFEC71C720000000, float 0x3FEC71C6E0000000, float 0xBFEBE5DC60000000, float 0x3FEBE5DC40000000, float 0xBFEA477C20000000, float 0x3FEA477C20000000, float 0xBFE7A693C0000000, float 0x3FE7A69380000000, float 0xBFE41CFEA0000000, float 0xBFDF9B13E0000000, float 0xBFD5C53B80000000, float 0xBFC6326AC0000000, float 0x3EE0789320000000, float 0x3FC6326AC0000000, float 0x3FD5C53B80000000, float 0x3FDF9B13E0000000, float 0x3FE41CFE80000000>, i32 %idx
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
index 07ca294019341..f1d147947ccdf 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v4f32.ll
@@ -138,12 +138,11 @@ define void @v_shuffle_v2f32_v4f32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -273,12 +272,11 @@ define void @v_shuffle_v2f32_v4f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -316,16 +314,14 @@ define void @v_shuffle_v2f32_v4f32__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v5, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -370,15 +366,14 @@ define void @v_shuffle_v2f32_v4f32__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v7, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -423,15 +418,14 @@ define void @v_shuffle_v2f32_v4f32__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx2 v7, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -476,15 +470,14 @@ define void @v_shuffle_v2f32_v4f32__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -529,13 +522,12 @@ define void @v_shuffle_v2f32_v4f32__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -575,12 +567,12 @@ define void @v_shuffle_v2f32_v4f32__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -618,12 +610,12 @@ define void @v_shuffle_v2f32_v4f32__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -661,12 +653,12 @@ define void @v_shuffle_v2f32_v4f32__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -704,12 +696,11 @@ define void @v_shuffle_v2f32_v4f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,13 +863,12 @@ define void @v_shuffle_v2f32_v4f32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -917,12 +907,11 @@ define void @v_shuffle_v2f32_v4f32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1227,12 +1216,12 @@ define void @v_shuffle_v2f32_v4f32__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1582,12 +1571,12 @@ define void @v_shuffle_v2f32_v4f32__3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1933,12 +1922,12 @@ define void @v_shuffle_v2f32_v4f32__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2250,12 +2239,11 @@ define void @v_shuffle_v2f32_v4f32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v4f32__3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2592,11 +2580,11 @@ define void @v_shuffle_v2f32_v4f32__3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2972,11 +2960,11 @@ define void @v_shuffle_v2f32_v4f32__3_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3354,11 +3342,11 @@ define void @v_shuffle_v2f32_v4f32__3_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
index 3deb23ca5314b..c17adef30c77a 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2f32.v8f32.ll
@@ -298,12 +298,11 @@ define void @v_shuffle_v2f32_v8f32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -597,12 +596,11 @@ define void @v_shuffle_v2f32_v8f32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -640,16 +638,14 @@ define void @v_shuffle_v2f32_v8f32__15_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:8]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v8
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v9, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -694,15 +690,14 @@ define void @v_shuffle_v2f32_v8f32__15_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v15, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[7:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v15, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -747,15 +742,14 @@ define void @v_shuffle_v2f32_v8f32__15_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:10]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v10
-; GFX900-NEXT: global_store_dwordx2 v11, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -800,15 +794,14 @@ define void @v_shuffle_v2f32_v8f32__15_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v13, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[5:12]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v13, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -853,15 +846,14 @@ define void @v_shuffle_v2f32_v8f32__15_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v13, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:12]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v12
-; GFX900-NEXT: global_store_dwordx2 v13, v[3:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -906,15 +898,14 @@ define void @v_shuffle_v2f32_v8f32__15_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[3:10]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v11, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -959,15 +950,14 @@ define void @v_shuffle_v2f32_v8f32__15_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v15, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[7:14]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v14
-; GFX900-NEXT: global_store_dwordx2 v15, v[5:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1012,15 +1002,14 @@ define void @v_shuffle_v2f32_v8f32__15_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1065,13 +1054,12 @@ define void @v_shuffle_v2f32_v8f32__15_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1111,12 +1099,12 @@ define void @v_shuffle_v2f32_v8f32__15_9(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_9:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1154,12 +1142,12 @@ define void @v_shuffle_v2f32_v8f32__15_10(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_10:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1199,12 +1187,12 @@ define void @v_shuffle_v2f32_v8f32__15_11(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_11:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v3
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1242,12 +1230,12 @@ define void @v_shuffle_v2f32_v8f32__15_12(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_12:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1287,12 +1275,12 @@ define void @v_shuffle_v2f32_v8f32__15_13(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_13:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v5
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1330,12 +1318,12 @@ define void @v_shuffle_v2f32_v8f32__15_14(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_14:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1373,12 +1361,12 @@ define void @v_shuffle_v2f32_v8f32__15_15(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__15_15:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1416,12 +1404,11 @@ define void @v_shuffle_v2f32_v8f32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1756,13 +1743,12 @@ define void @v_shuffle_v2f32_v8f32__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1801,12 +1787,11 @@ define void @v_shuffle_v2f32_v8f32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__8_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2491,12 +2476,12 @@ define void @v_shuffle_v2f32_v8f32__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3228,12 +3213,12 @@ define void @v_shuffle_v2f32_v8f32__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3961,12 +3946,12 @@ define void @v_shuffle_v2f32_v8f32__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v3
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4698,12 +4683,12 @@ define void @v_shuffle_v2f32_v8f32__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5431,12 +5416,12 @@ define void @v_shuffle_v2f32_v8f32__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v5
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6170,12 +6155,12 @@ define void @v_shuffle_v2f32_v8f32__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6901,12 +6886,12 @@ define void @v_shuffle_v2f32_v8f32__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7590,12 +7575,11 @@ define void @v_shuffle_v2f32_v8f32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2f32_v8f32__7_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8320,11 +8304,11 @@ define void @v_shuffle_v2f32_v8f32__7_9(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9084,11 +9068,11 @@ define void @v_shuffle_v2f32_v8f32__7_10(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[9:10], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9852,11 +9836,11 @@ define void @v_shuffle_v2f32_v8f32__7_11(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10616,11 +10600,11 @@ define void @v_shuffle_v2f32_v8f32__7_12(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[11:12], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11384,11 +11368,11 @@ define void @v_shuffle_v2f32_v8f32__7_13(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12148,11 +12132,11 @@ define void @v_shuffle_v2f32_v8f32__7_14(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[13:14], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12918,11 +12902,11 @@ define void @v_shuffle_v2f32_v8f32__7_15(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
index 37df1b6a72e03..39c6a447788e4 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v4i32.ll
@@ -138,12 +138,11 @@ define void @v_shuffle_v2i32_v4i32__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -273,12 +272,11 @@ define void @v_shuffle_v2i32_v4i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -316,16 +314,14 @@ define void @v_shuffle_v2i32_v4i32__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v5, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -370,15 +366,14 @@ define void @v_shuffle_v2i32_v4i32__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v7, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -423,15 +418,14 @@ define void @v_shuffle_v2i32_v4i32__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx2 v7, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -476,15 +470,14 @@ define void @v_shuffle_v2i32_v4i32__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -529,13 +522,12 @@ define void @v_shuffle_v2i32_v4i32__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -575,12 +567,12 @@ define void @v_shuffle_v2i32_v4i32__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -618,12 +610,12 @@ define void @v_shuffle_v2i32_v4i32__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -661,12 +653,12 @@ define void @v_shuffle_v2i32_v4i32__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -704,12 +696,11 @@ define void @v_shuffle_v2i32_v4i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,13 +863,12 @@ define void @v_shuffle_v2i32_v4i32__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -917,12 +907,11 @@ define void @v_shuffle_v2i32_v4i32__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1227,12 +1216,12 @@ define void @v_shuffle_v2i32_v4i32__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1582,12 +1571,12 @@ define void @v_shuffle_v2i32_v4i32__3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1933,12 +1922,12 @@ define void @v_shuffle_v2i32_v4i32__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2250,12 +2239,11 @@ define void @v_shuffle_v2i32_v4i32__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v4i32__3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2592,11 +2580,11 @@ define void @v_shuffle_v2i32_v4i32__3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2972,11 +2960,11 @@ define void @v_shuffle_v2i32_v4i32__3_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3354,11 +3342,11 @@ define void @v_shuffle_v2i32_v4i32__3_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
index 94ee1774c2766..0917a6ecaa5c4 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i32.v8i32.ll
@@ -298,12 +298,11 @@ define void @v_shuffle_v2i32_v8i32__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -597,12 +596,11 @@ define void @v_shuffle_v2i32_v8i32__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -640,16 +638,14 @@ define void @v_shuffle_v2i32_v8i32__15_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:8]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v8
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v9, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -694,15 +690,14 @@ define void @v_shuffle_v2i32_v8i32__15_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v15, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[7:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v15, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -747,15 +742,14 @@ define void @v_shuffle_v2i32_v8i32__15_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:10]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v10
-; GFX900-NEXT: global_store_dwordx2 v11, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -800,15 +794,14 @@ define void @v_shuffle_v2i32_v8i32__15_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v13, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[5:12]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v13, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -853,15 +846,14 @@ define void @v_shuffle_v2i32_v8i32__15_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v13, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:12]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v12
-; GFX900-NEXT: global_store_dwordx2 v13, v[3:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -906,15 +898,14 @@ define void @v_shuffle_v2i32_v8i32__15_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[3:10]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v11, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -959,15 +950,14 @@ define void @v_shuffle_v2i32_v8i32__15_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v15, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[7:14]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v14
-; GFX900-NEXT: global_store_dwordx2 v15, v[5:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1012,15 +1002,14 @@ define void @v_shuffle_v2i32_v8i32__15_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1065,13 +1054,12 @@ define void @v_shuffle_v2i32_v8i32__15_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1111,12 +1099,12 @@ define void @v_shuffle_v2i32_v8i32__15_9(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_9:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1154,12 +1142,12 @@ define void @v_shuffle_v2i32_v8i32__15_10(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_10:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1199,12 +1187,12 @@ define void @v_shuffle_v2i32_v8i32__15_11(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_11:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v3
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1242,12 +1230,12 @@ define void @v_shuffle_v2i32_v8i32__15_12(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_12:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1287,12 +1275,12 @@ define void @v_shuffle_v2i32_v8i32__15_13(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_13:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v5
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1330,12 +1318,12 @@ define void @v_shuffle_v2i32_v8i32__15_14(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_14:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1373,12 +1361,12 @@ define void @v_shuffle_v2i32_v8i32__15_15(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__15_15:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1416,12 +1404,11 @@ define void @v_shuffle_v2i32_v8i32__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1756,13 +1743,12 @@ define void @v_shuffle_v2i32_v8i32__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1801,12 +1787,11 @@ define void @v_shuffle_v2i32_v8i32__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__8_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2491,12 +2476,12 @@ define void @v_shuffle_v2i32_v8i32__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3228,12 +3213,12 @@ define void @v_shuffle_v2i32_v8i32__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3961,12 +3946,12 @@ define void @v_shuffle_v2i32_v8i32__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v3
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4698,12 +4683,12 @@ define void @v_shuffle_v2i32_v8i32__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5431,12 +5416,12 @@ define void @v_shuffle_v2i32_v8i32__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v5
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6170,12 +6155,12 @@ define void @v_shuffle_v2i32_v8i32__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6901,12 +6886,12 @@ define void @v_shuffle_v2i32_v8i32__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7590,12 +7575,11 @@ define void @v_shuffle_v2i32_v8i32__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i32_v8i32__7_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8320,11 +8304,11 @@ define void @v_shuffle_v2i32_v8i32__7_9(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9084,11 +9068,11 @@ define void @v_shuffle_v2i32_v8i32__7_10(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[9:10], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9852,11 +9836,11 @@ define void @v_shuffle_v2i32_v8i32__7_11(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10616,11 +10600,11 @@ define void @v_shuffle_v2i32_v8i32__7_12(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[11:12], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11384,11 +11368,11 @@ define void @v_shuffle_v2i32_v8i32__7_13(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12148,11 +12132,11 @@ define void @v_shuffle_v2i32_v8i32__7_14(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[13:14], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12918,11 +12902,11 @@ define void @v_shuffle_v2i32_v8i32__7_15(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
index 0b20caea9cd95..1df6f21f15594 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
@@ -58,39 +58,33 @@ define void @v_shuffle_v2i64_v2i64__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -114,39 +108,33 @@ define void @v_shuffle_v2i64_v2i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -160,55 +148,42 @@ define void @v_shuffle_v2i64_v2i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -222,49 +197,43 @@ define void @v_shuffle_v2i64_v2i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -291,31 +260,27 @@ define void @v_shuffle_v2i64_v2i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -329,39 +294,40 @@ define void @v_shuffle_v2i64_v2i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -375,39 +341,33 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -478,31 +438,27 @@ define void @v_shuffle_v2i64_v2i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -515,39 +471,33 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -638,39 +588,40 @@ define void @v_shuffle_v2i64_v2i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -772,39 +723,33 @@ define void @v_shuffle_v2i64_v2i64__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v2i64__1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v2i64__1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v2i64__1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -928,12 +873,12 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,12 +889,12 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -960,13 +905,12 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
index bc8a56a30d8f9..13b16f778aa97 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
@@ -97,39 +97,33 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -193,39 +187,33 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -239,55 +227,42 @@ define void @v_shuffle_v2i64_v3i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -301,49 +276,43 @@ define void @v_shuffle_v2i64_v3i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -357,49 +326,43 @@ define void @v_shuffle_v2i64_v3i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -413,45 +376,40 @@ define void @v_shuffle_v2i64_v3i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -465,39 +423,40 @@ define void @v_shuffle_v2i64_v3i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -511,39 +470,40 @@ define void @v_shuffle_v2i64_v3i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -557,39 +517,33 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -692,45 +646,40 @@ define void @v_shuffle_v2i64_v3i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -743,39 +692,33 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -967,39 +910,40 @@ define void @v_shuffle_v2i64_v3i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1230,39 +1174,40 @@ define void @v_shuffle_v2i64_v3i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1459,39 +1404,33 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v3i64__2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v3i64__2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v3i64__2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1717,12 +1656,12 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1733,12 +1672,12 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1749,13 +1688,12 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2011,12 +1949,12 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2027,12 +1965,12 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2043,13 +1981,12 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2271,10 +2208,9 @@ define void @s_shuffle_v2i64_v3i64__2_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2372,10 +2308,9 @@ define void @s_shuffle_v2i64_v3i64__5_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2392,15 +2327,13 @@ define void @s_shuffle_v2i64_v3i64__5_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -2410,15 +2343,13 @@ define void @s_shuffle_v2i64_v3i64__5_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -2481,11 +2412,11 @@ define void @s_shuffle_v2i64_v3i64__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2536,13 +2467,11 @@ define void @s_shuffle_v2i64_v3i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2555,46 +2484,18 @@ define void @s_shuffle_v2i64_v3i64__5_2() {
}
define void @s_shuffle_v2i64_v3i64__5_3() {
-; GFX900-LABEL: s_shuffle_v2i64_v3i64__5_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v3i64__5_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v3i64__5_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 5, i32 3>
@@ -2607,10 +2508,10 @@ define void @s_shuffle_v2i64_v3i64__5_4() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:11]
; GFX9-NEXT: ;;#ASMEND
@@ -2623,50 +2524,18 @@ define void @s_shuffle_v2i64_v3i64__5_4() {
}
define void @s_shuffle_v2i64_v3i64__5_5() {
-; GFX900-LABEL: s_shuffle_v2i64_v3i64__5_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v3i64__5_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v3i64__5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 5, i32 5>
@@ -2790,46 +2659,18 @@ define void @s_shuffle_v2i64_v3i64__1_0() {
}
define void @s_shuffle_v2i64_v3i64__2_0() {
-; GFX900-LABEL: s_shuffle_v2i64_v3i64__2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v3i64__2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v3i64__2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3046,10 +2887,10 @@ define void @s_shuffle_v2i64_v3i64__2_1() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:11]
; GFX9-NEXT: ;;#ASMEND
@@ -3271,50 +3112,18 @@ define void @s_shuffle_v2i64_v3i64__1_2() {
}
define void @s_shuffle_v2i64_v3i64__2_2() {
-; GFX900-LABEL: s_shuffle_v2i64_v3i64__2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2i64_v3i64__2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2i64_v3i64__2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3553,10 +3362,9 @@ define void @s_shuffle_v2i64_v3i64__2_3() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3789,13 +3597,13 @@ define void @s_shuffle_v2i64_v3i64__2_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3805,13 +3613,13 @@ define void @s_shuffle_v2i64_v3i64__2_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3821,13 +3629,14 @@ define void @s_shuffle_v2i64_v3i64__2_4() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4099,15 +3908,14 @@ define void @s_shuffle_v2i64_v3i64__2_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
index dd42a1dd44320..e756a7ae1682d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
@@ -136,39 +136,33 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -272,39 +266,33 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -318,55 +306,42 @@ define void @v_shuffle_v2i64_v4i64__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -380,49 +355,43 @@ define void @v_shuffle_v2i64_v4i64__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -436,49 +405,43 @@ define void @v_shuffle_v2i64_v4i64__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -492,49 +455,43 @@ define void @v_shuffle_v2i64_v4i64__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -548,45 +505,40 @@ define void @v_shuffle_v2i64_v4i64__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -600,39 +552,40 @@ define void @v_shuffle_v2i64_v4i64__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -646,39 +599,40 @@ define void @v_shuffle_v2i64_v4i64__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -692,39 +646,40 @@ define void @v_shuffle_v2i64_v4i64__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -738,39 +693,33 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -918,45 +867,40 @@ define void @v_shuffle_v2i64_v4i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -969,39 +913,33 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1294,39 +1232,40 @@ define void @v_shuffle_v2i64_v4i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1658,39 +1597,40 @@ define void @v_shuffle_v2i64_v4i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2022,39 +1962,40 @@ define void @v_shuffle_v2i64_v4i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2346,39 +2287,33 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v4i64__3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v4i64__3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v4i64__3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2706,12 +2641,12 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2722,12 +2657,12 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2738,13 +2673,12 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3102,12 +3036,12 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3118,12 +3052,12 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3134,13 +3068,12 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3498,12 +3431,12 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3514,12 +3447,12 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3530,13 +3463,12 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
index 7ee7c83e0122d..1c2215d39dc02 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
@@ -292,39 +292,33 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v8i64__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -588,39 +582,33 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v8i64__15_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -637,52 +625,39 @@ define void @v_shuffle_v2i64_v8i64__15_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, v14
-; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[16:31]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v18, v0
-; GFX900-NEXT: v_mov_b32_e32 v19, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[16:19], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v16
-; GFX90A-NEXT: v_mov_b32_e32 v3, v17
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v18, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -697,48 +672,42 @@ define void @v_shuffle_v2i64_v8i64__15_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[14:29]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:19]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v18
-; GFX900-NEXT: v_mov_b32_e32 v1, v19
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v30, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[14:29]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v20, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:19]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v18
-; GFX90A-NEXT: v_mov_b32_e32 v1, v19
-; GFX90A-NEXT: global_store_dwordx4 v20, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v30, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v30, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v20, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:19]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v18
-; GFX942-NEXT: v_mov_b32_e32 v1, v19
-; GFX942-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v30, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -753,48 +722,42 @@ define void @v_shuffle_v2i64_v8i64__15_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[12:27]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:21]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v20
-; GFX900-NEXT: v_mov_b32_e32 v3, v21
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v28, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[12:27]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v22, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:21]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v20
-; GFX90A-NEXT: v_mov_b32_e32 v3, v21
-; GFX90A-NEXT: global_store_dwordx4 v22, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v28, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v28, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v22, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:21]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v20
-; GFX942-NEXT: v_mov_b32_e32 v3, v21
-; GFX942-NEXT: global_store_dwordx4 v22, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v28, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -809,48 +772,42 @@ define void @v_shuffle_v2i64_v8i64__15_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[10:25]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:23]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v22
-; GFX900-NEXT: v_mov_b32_e32 v5, v23
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v26, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[10:25]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v24, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:23]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v22
-; GFX90A-NEXT: v_mov_b32_e32 v5, v23
-; GFX90A-NEXT: global_store_dwordx4 v24, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v26, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v26, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v24, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:23]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v22
-; GFX942-NEXT: v_mov_b32_e32 v5, v23
-; GFX942-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v26, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -865,48 +822,42 @@ define void @v_shuffle_v2i64_v8i64__15_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[8:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[10:25]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v24
-; GFX900-NEXT: v_mov_b32_e32 v7, v25
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v24, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[8:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v26, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[10:25]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v24
-; GFX90A-NEXT: v_mov_b32_e32 v7, v25
-; GFX90A-NEXT: global_store_dwordx4 v26, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v24, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v24, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v26, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[10:25]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v24
-; GFX942-NEXT: v_mov_b32_e32 v7, v25
-; GFX942-NEXT: global_store_dwordx4 v26, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v24, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -921,48 +872,42 @@ define void @v_shuffle_v2i64_v8i64__15_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[6:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[12:27]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v26
-; GFX900-NEXT: v_mov_b32_e32 v9, v27
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v22, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[6:21]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v28, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[12:27]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v26
-; GFX90A-NEXT: v_mov_b32_e32 v9, v27
-; GFX90A-NEXT: global_store_dwordx4 v28, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v22, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v22, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v28, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[12:27]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v26
-; GFX942-NEXT: v_mov_b32_e32 v9, v27
-; GFX942-NEXT: global_store_dwordx4 v28, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v22, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -977,48 +922,42 @@ define void @v_shuffle_v2i64_v8i64__15_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[4:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[14:29]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v28
-; GFX900-NEXT: v_mov_b32_e32 v11, v29
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v20, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v30, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[14:29]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v28
-; GFX90A-NEXT: v_mov_b32_e32 v11, v29
-; GFX90A-NEXT: global_store_dwordx4 v30, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v20, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v20, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v30, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[14:29]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v28
-; GFX942-NEXT: v_mov_b32_e32 v11, v29
-; GFX942-NEXT: global_store_dwordx4 v30, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v20, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1033,48 +972,42 @@ define void @v_shuffle_v2i64_v8i64__15_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[2:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[16:31]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v30
-; GFX900-NEXT: v_mov_b32_e32 v13, v31
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[2:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v32, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[16:31]
+; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v30
-; GFX90A-NEXT: v_mov_b32_e32 v13, v31
-; GFX90A-NEXT: global_store_dwordx4 v32, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[16:31]
+; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v30
-; GFX942-NEXT: v_mov_b32_e32 v13, v31
-; GFX942-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1091,42 +1024,37 @@ define void @v_shuffle_v2i64_v8i64__15_8(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v14
-; GFX900-NEXT: v_mov_b32_e32 v3, v15
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, v0
+; GFX900-NEXT: v_mov_b32_e32 v17, v1
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_8:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v0
+; GFX90A-NEXT: v_mov_b32_e32 v17, v1
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_8:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v0
+; GFX942-NEXT: v_mov_b32_e32 v17, v1
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1143,36 +1071,37 @@ define void @v_shuffle_v2i64_v8i64__15_9(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v2
+; GFX900-NEXT: v_mov_b32_e32 v17, v3
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_9:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v2
+; GFX90A-NEXT: v_mov_b32_e32 v17, v3
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_9:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v2
+; GFX942-NEXT: v_mov_b32_e32 v17, v3
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1189,36 +1118,37 @@ define void @v_shuffle_v2i64_v8i64__15_10(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v14
-; GFX900-NEXT: v_mov_b32_e32 v3, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v4
+; GFX900-NEXT: v_mov_b32_e32 v17, v5
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_10:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v4
+; GFX90A-NEXT: v_mov_b32_e32 v17, v5
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_10:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v4
+; GFX942-NEXT: v_mov_b32_e32 v17, v5
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1235,36 +1165,37 @@ define void @v_shuffle_v2i64_v8i64__15_11(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v6
+; GFX900-NEXT: v_mov_b32_e32 v17, v7
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_11:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v6
+; GFX90A-NEXT: v_mov_b32_e32 v17, v7
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_11:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v6
+; GFX942-NEXT: v_mov_b32_e32 v17, v7
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1281,36 +1212,37 @@ define void @v_shuffle_v2i64_v8i64__15_12(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v14
-; GFX900-NEXT: v_mov_b32_e32 v7, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, v9
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_12:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v14
-; GFX90A-NEXT: v_mov_b32_e32 v7, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v8
+; GFX90A-NEXT: v_mov_b32_e32 v17, v9
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_12:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v8
+; GFX942-NEXT: v_mov_b32_e32 v17, v9
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1327,36 +1259,37 @@ define void @v_shuffle_v2i64_v8i64__15_13(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v14
-; GFX900-NEXT: v_mov_b32_e32 v9, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v10
+; GFX900-NEXT: v_mov_b32_e32 v17, v11
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_13:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v14
-; GFX90A-NEXT: v_mov_b32_e32 v9, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v10
+; GFX90A-NEXT: v_mov_b32_e32 v17, v11
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_13:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v10
+; GFX942-NEXT: v_mov_b32_e32 v17, v11
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1373,36 +1306,37 @@ define void @v_shuffle_v2i64_v8i64__15_14(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v14
-; GFX900-NEXT: v_mov_b32_e32 v11, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v12
+; GFX900-NEXT: v_mov_b32_e32 v17, v13
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_14:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v14
-; GFX90A-NEXT: v_mov_b32_e32 v11, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v12
+; GFX90A-NEXT: v_mov_b32_e32 v17, v13
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_14:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v12
+; GFX942-NEXT: v_mov_b32_e32 v17, v13
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1419,36 +1353,37 @@ define void @v_shuffle_v2i64_v8i64__15_15(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__15_15:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__15_15:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1462,12 +1397,10 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v8i64__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[2:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1475,26 +1408,22 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[2:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1825,42 +1754,37 @@ define void @v_shuffle_v2i64_v8i64__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v14
-; GFX900-NEXT: v_mov_b32_e32 v3, v15
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, v0
+; GFX900-NEXT: v_mov_b32_e32 v17, v1
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v0
+; GFX90A-NEXT: v_mov_b32_e32 v17, v1
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v0
+; GFX942-NEXT: v_mov_b32_e32 v17, v1
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -1873,12 +1797,10 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v8i64__8_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:15]
+; GFX900-NEXT: ; def v[2:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1886,26 +1808,22 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__8_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:15]
+; GFX90A-NEXT: ; def v[2:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__8_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:15]
+; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -2605,36 +2523,37 @@ define void @v_shuffle_v2i64_v8i64__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v2
+; GFX900-NEXT: v_mov_b32_e32 v17, v3
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v2
+; GFX90A-NEXT: v_mov_b32_e32 v17, v3
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v2
+; GFX942-NEXT: v_mov_b32_e32 v17, v3
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -3373,36 +3292,37 @@ define void @v_shuffle_v2i64_v8i64__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v14
-; GFX900-NEXT: v_mov_b32_e32 v3, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v4
+; GFX900-NEXT: v_mov_b32_e32 v17, v5
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v4
+; GFX90A-NEXT: v_mov_b32_e32 v17, v5
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v4
+; GFX942-NEXT: v_mov_b32_e32 v17, v5
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -4141,36 +4061,37 @@ define void @v_shuffle_v2i64_v8i64__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v6
+; GFX900-NEXT: v_mov_b32_e32 v17, v7
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v6
+; GFX90A-NEXT: v_mov_b32_e32 v17, v7
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v6
+; GFX942-NEXT: v_mov_b32_e32 v17, v7
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -4909,36 +4830,37 @@ define void @v_shuffle_v2i64_v8i64__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v14
-; GFX900-NEXT: v_mov_b32_e32 v7, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, v9
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v14
-; GFX90A-NEXT: v_mov_b32_e32 v7, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v8
+; GFX90A-NEXT: v_mov_b32_e32 v17, v9
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v8
+; GFX942-NEXT: v_mov_b32_e32 v17, v9
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -5677,36 +5599,37 @@ define void @v_shuffle_v2i64_v8i64__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v14
-; GFX900-NEXT: v_mov_b32_e32 v9, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v10
+; GFX900-NEXT: v_mov_b32_e32 v17, v11
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v14
-; GFX90A-NEXT: v_mov_b32_e32 v9, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v10
+; GFX90A-NEXT: v_mov_b32_e32 v17, v11
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v10
+; GFX942-NEXT: v_mov_b32_e32 v17, v11
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -6445,36 +6368,37 @@ define void @v_shuffle_v2i64_v8i64__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v14
-; GFX900-NEXT: v_mov_b32_e32 v11, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v12
+; GFX900-NEXT: v_mov_b32_e32 v17, v13
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v14
-; GFX90A-NEXT: v_mov_b32_e32 v11, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v12
+; GFX90A-NEXT: v_mov_b32_e32 v17, v13
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v12
+; GFX942-NEXT: v_mov_b32_e32 v17, v13
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -7213,36 +7137,37 @@ define void @v_shuffle_v2i64_v8i64__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -7914,39 +7839,33 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2i64_v8i64__7_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2i64_v8i64__7_8:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2i64_v8i64__7_8:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -8679,15 +8598,15 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v2
+; GFX900-NEXT: v_mov_b32_e32 v17, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v16
-; GFX900-NEXT: v_mov_b32_e32 v1, v17
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8695,15 +8614,15 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v18
+; GFX90A-NEXT: v_mov_b32_e32 v17, v19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v16
-; GFX90A-NEXT: v_mov_b32_e32 v1, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8711,16 +8630,15 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v18
+; GFX942-NEXT: v_mov_b32_e32 v17, v19
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v16
-; GFX942-NEXT: v_mov_b32_e32 v1, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -9483,15 +9401,15 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v4
+; GFX900-NEXT: v_mov_b32_e32 v17, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v16
-; GFX900-NEXT: v_mov_b32_e32 v3, v17
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9499,15 +9417,15 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v20
+; GFX90A-NEXT: v_mov_b32_e32 v17, v21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v16
-; GFX90A-NEXT: v_mov_b32_e32 v3, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9515,16 +9433,15 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v20
+; GFX942-NEXT: v_mov_b32_e32 v17, v21
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -10287,15 +10204,15 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v6
+; GFX900-NEXT: v_mov_b32_e32 v17, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v16
-; GFX900-NEXT: v_mov_b32_e32 v5, v17
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10303,15 +10220,15 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v22
+; GFX90A-NEXT: v_mov_b32_e32 v17, v23
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v16
-; GFX90A-NEXT: v_mov_b32_e32 v5, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10319,16 +10236,15 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v22
+; GFX942-NEXT: v_mov_b32_e32 v17, v23
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v16
-; GFX942-NEXT: v_mov_b32_e32 v5, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -11091,15 +11007,15 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v8
+; GFX900-NEXT: v_mov_b32_e32 v17, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v16
-; GFX900-NEXT: v_mov_b32_e32 v7, v17
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11107,15 +11023,15 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v24
+; GFX90A-NEXT: v_mov_b32_e32 v17, v25
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v16
-; GFX90A-NEXT: v_mov_b32_e32 v7, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11123,16 +11039,15 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v24
+; GFX942-NEXT: v_mov_b32_e32 v17, v25
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v16
-; GFX942-NEXT: v_mov_b32_e32 v7, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -11895,15 +11810,15 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v10
+; GFX900-NEXT: v_mov_b32_e32 v17, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v16
-; GFX900-NEXT: v_mov_b32_e32 v9, v17
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11911,15 +11826,15 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v26
+; GFX90A-NEXT: v_mov_b32_e32 v17, v27
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v16
-; GFX90A-NEXT: v_mov_b32_e32 v9, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11927,16 +11842,15 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v26
+; GFX942-NEXT: v_mov_b32_e32 v17, v27
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v16
-; GFX942-NEXT: v_mov_b32_e32 v9, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -12699,15 +12613,15 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v12
+; GFX900-NEXT: v_mov_b32_e32 v17, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v16
-; GFX900-NEXT: v_mov_b32_e32 v11, v17
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12715,15 +12629,15 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v28
+; GFX90A-NEXT: v_mov_b32_e32 v17, v29
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v16
-; GFX90A-NEXT: v_mov_b32_e32 v11, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12731,16 +12645,15 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v28
+; GFX942-NEXT: v_mov_b32_e32 v17, v29
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v16
-; GFX942-NEXT: v_mov_b32_e32 v11, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
@@ -13503,15 +13416,15 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:17]
+; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v16
-; GFX900-NEXT: v_mov_b32_e32 v13, v17
; GFX900-NEXT: v_mov_b32_e32 v0, 0
-; GFX900-NEXT: global_store_dwordx4 v0, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13519,15 +13432,15 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:17]
+; GFX90A-NEXT: ; def v[16:31]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v32, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v30
+; GFX90A-NEXT: v_mov_b32_e32 v17, v31
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v18, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v16
-; GFX90A-NEXT: v_mov_b32_e32 v13, v17
-; GFX90A-NEXT: global_store_dwordx4 v18, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v32, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13535,16 +13448,15 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:17]
+; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v32, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v30
+; GFX942-NEXT: v_mov_b32_e32 v17, v31
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v16
-; GFX942-NEXT: v_mov_b32_e32 v13, v17
-; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v32, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
index 2ecbf9622a259..411d8b735b9b6 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
@@ -58,39 +58,33 @@ define void @v_shuffle_v2p0_v2p0__1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -114,39 +108,33 @@ define void @v_shuffle_v2p0_v2p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -160,55 +148,42 @@ define void @v_shuffle_v2p0_v2p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -222,49 +197,43 @@ define void @v_shuffle_v2p0_v2p0__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -291,31 +260,27 @@ define void @v_shuffle_v2p0_v2p0__3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -329,39 +294,40 @@ define void @v_shuffle_v2p0_v2p0__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -375,39 +341,33 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -478,31 +438,27 @@ define void @v_shuffle_v2p0_v2p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -515,39 +471,33 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -638,39 +588,40 @@ define void @v_shuffle_v2p0_v2p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -772,39 +723,33 @@ define void @v_shuffle_v2p0_v2p0__1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v2p0__1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v2p0__1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v2p0__1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -928,12 +873,12 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,12 +889,12 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -960,13 +905,12 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
index 27a6cf11c4cb1..385dc73531d14 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
@@ -97,39 +97,33 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -193,39 +187,33 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -239,55 +227,42 @@ define void @v_shuffle_v2p0_v3p0__5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -301,49 +276,43 @@ define void @v_shuffle_v2p0_v3p0__5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -357,49 +326,43 @@ define void @v_shuffle_v2p0_v3p0__5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -413,45 +376,40 @@ define void @v_shuffle_v2p0_v3p0__5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -465,39 +423,40 @@ define void @v_shuffle_v2p0_v3p0__5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -511,39 +470,40 @@ define void @v_shuffle_v2p0_v3p0__5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -557,39 +517,33 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -692,45 +646,40 @@ define void @v_shuffle_v2p0_v3p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -743,39 +692,33 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -967,39 +910,40 @@ define void @v_shuffle_v2p0_v3p0__2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1230,39 +1174,40 @@ define void @v_shuffle_v2p0_v3p0__2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1459,39 +1404,33 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v3p0__2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v3p0__2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v3p0__2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1717,12 +1656,12 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1733,12 +1672,12 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1749,13 +1688,12 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2011,12 +1949,12 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2027,12 +1965,12 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2043,13 +1981,12 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2271,10 +2208,9 @@ define void @s_shuffle_v2p0_v3p0__2_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2372,10 +2308,9 @@ define void @s_shuffle_v2p0_v3p0__5_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2392,15 +2327,13 @@ define void @s_shuffle_v2p0_v3p0__5_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -2410,15 +2343,13 @@ define void @s_shuffle_v2p0_v3p0__5_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -2481,11 +2412,11 @@ define void @s_shuffle_v2p0_v3p0__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2536,13 +2467,11 @@ define void @s_shuffle_v2p0_v3p0__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2555,46 +2484,18 @@ define void @s_shuffle_v2p0_v3p0__5_2() {
}
define void @s_shuffle_v2p0_v3p0__5_3() {
-; GFX900-LABEL: s_shuffle_v2p0_v3p0__5_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2p0_v3p0__5_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2p0_v3p0__5_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 5, i32 3>
@@ -2607,10 +2508,10 @@ define void @s_shuffle_v2p0_v3p0__5_4() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:11]
; GFX9-NEXT: ;;#ASMEND
@@ -2623,50 +2524,18 @@ define void @s_shuffle_v2p0_v3p0__5_4() {
}
define void @s_shuffle_v2p0_v3p0__5_5() {
-; GFX900-LABEL: s_shuffle_v2p0_v3p0__5_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2p0_v3p0__5_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2p0_v3p0__5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 5, i32 5>
@@ -2790,46 +2659,18 @@ define void @s_shuffle_v2p0_v3p0__1_0() {
}
define void @s_shuffle_v2p0_v3p0__2_0() {
-; GFX900-LABEL: s_shuffle_v2p0_v3p0__2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2p0_v3p0__2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2p0_v3p0__2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3046,10 +2887,10 @@ define void @s_shuffle_v2p0_v3p0__2_1() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:11]
; GFX9-NEXT: ;;#ASMEND
@@ -3271,50 +3112,18 @@ define void @s_shuffle_v2p0_v3p0__1_2() {
}
define void @s_shuffle_v2p0_v3p0__2_2() {
-; GFX900-LABEL: s_shuffle_v2p0_v3p0__2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v2p0_v3p0__2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v2p0_v3p0__2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3553,10 +3362,9 @@ define void @s_shuffle_v2p0_v3p0__2_3() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3789,13 +3597,13 @@ define void @s_shuffle_v2p0_v3p0__2_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
@@ -3805,13 +3613,13 @@ define void @s_shuffle_v2p0_v3p0__2_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
@@ -3821,13 +3629,14 @@ define void @s_shuffle_v2p0_v3p0__2_4() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4099,15 +3908,14 @@ define void @s_shuffle_v2p0_v3p0__2_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
index ae31524ebaa7f..70d72571b9897 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
@@ -136,39 +136,33 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -272,39 +266,33 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -318,55 +306,42 @@ define void @v_shuffle_v2p0_v4p0__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -380,49 +355,43 @@ define void @v_shuffle_v2p0_v4p0__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -436,49 +405,43 @@ define void @v_shuffle_v2p0_v4p0__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -492,49 +455,43 @@ define void @v_shuffle_v2p0_v4p0__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -548,45 +505,40 @@ define void @v_shuffle_v2p0_v4p0__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -600,39 +552,40 @@ define void @v_shuffle_v2p0_v4p0__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -646,39 +599,40 @@ define void @v_shuffle_v2p0_v4p0__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -692,39 +646,40 @@ define void @v_shuffle_v2p0_v4p0__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -738,39 +693,33 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -918,45 +867,40 @@ define void @v_shuffle_v2p0_v4p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -969,39 +913,33 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1294,39 +1232,40 @@ define void @v_shuffle_v2p0_v4p0__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1658,39 +1597,40 @@ define void @v_shuffle_v2p0_v4p0__3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2022,39 +1962,40 @@ define void @v_shuffle_v2p0_v4p0__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2346,39 +2287,33 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p0_v4p0__3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v2p0_v4p0__3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v2p0_v4p0__3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2706,12 +2641,12 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2722,12 +2657,12 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2738,13 +2673,12 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3102,12 +3036,12 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3118,12 +3052,12 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3134,13 +3068,12 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3498,12 +3431,12 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3514,12 +3447,12 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3530,13 +3463,12 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
index a9085502c7358..c35361721e9b0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v4p3.ll
@@ -138,12 +138,11 @@ define void @v_shuffle_v2p3_v4p3__3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -273,12 +272,11 @@ define void @v_shuffle_v2p3_v4p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -316,16 +314,14 @@ define void @v_shuffle_v2p3_v4p3__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v5, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -370,15 +366,14 @@ define void @v_shuffle_v2p3_v4p3__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v7, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -423,15 +418,14 @@ define void @v_shuffle_v2p3_v4p3__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx2 v7, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -476,15 +470,14 @@ define void @v_shuffle_v2p3_v4p3__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -529,13 +522,12 @@ define void @v_shuffle_v2p3_v4p3__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -575,12 +567,12 @@ define void @v_shuffle_v2p3_v4p3__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -618,12 +610,12 @@ define void @v_shuffle_v2p3_v4p3__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -661,12 +653,12 @@ define void @v_shuffle_v2p3_v4p3__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -704,12 +696,11 @@ define void @v_shuffle_v2p3_v4p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -872,13 +863,12 @@ define void @v_shuffle_v2p3_v4p3__3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -917,12 +907,11 @@ define void @v_shuffle_v2p3_v4p3__4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1227,12 +1216,12 @@ define void @v_shuffle_v2p3_v4p3__3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1582,12 +1571,12 @@ define void @v_shuffle_v2p3_v4p3__3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1933,12 +1922,12 @@ define void @v_shuffle_v2p3_v4p3__3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx2 v5, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2250,12 +2239,11 @@ define void @v_shuffle_v2p3_v4p3__3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v4p3__3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v4, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2592,11 +2580,11 @@ define void @v_shuffle_v2p3_v4p3__3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2972,11 +2960,11 @@ define void @v_shuffle_v2p3_v4p3__3_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3354,11 +3342,11 @@ define void @v_shuffle_v2p3_v4p3__3_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
index 9174e92cd9c82..86ec2bc9816bc 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p3.v8p3.ll
@@ -298,12 +298,11 @@ define void @v_shuffle_v2p3_v8p3__7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -597,12 +596,11 @@ define void @v_shuffle_v2p3_v8p3__15_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -640,16 +638,14 @@ define void @v_shuffle_v2p3_v8p3__15_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:8]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v8
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v9, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -694,15 +690,14 @@ define void @v_shuffle_v2p3_v8p3__15_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v15, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[7:14]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v15, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -747,15 +742,14 @@ define void @v_shuffle_v2p3_v8p3__15_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:10]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v10
-; GFX900-NEXT: global_store_dwordx2 v11, v[1:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -800,15 +794,14 @@ define void @v_shuffle_v2p3_v8p3__15_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v13, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[5:12]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v13, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -853,15 +846,14 @@ define void @v_shuffle_v2p3_v8p3__15_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v13, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:12]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v12
-; GFX900-NEXT: global_store_dwordx2 v13, v[3:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -906,15 +898,14 @@ define void @v_shuffle_v2p3_v8p3__15_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v11, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[3:10]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v11, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -959,15 +950,14 @@ define void @v_shuffle_v2p3_v8p3__15_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v15, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[7:14]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v14
-; GFX900-NEXT: global_store_dwordx2 v15, v[5:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1012,15 +1002,14 @@ define void @v_shuffle_v2p3_v8p3__15_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1065,13 +1054,12 @@ define void @v_shuffle_v2p3_v8p3__15_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1111,12 +1099,12 @@ define void @v_shuffle_v2p3_v8p3__15_9(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_9:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1154,12 +1142,12 @@ define void @v_shuffle_v2p3_v8p3__15_10(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_10:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1199,12 +1187,12 @@ define void @v_shuffle_v2p3_v8p3__15_11(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_11:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v3
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1242,12 +1230,12 @@ define void @v_shuffle_v2p3_v8p3__15_12(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_12:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1287,12 +1275,12 @@ define void @v_shuffle_v2p3_v8p3__15_13(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_13:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v5
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1330,12 +1318,12 @@ define void @v_shuffle_v2p3_v8p3__15_14(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_14:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1373,12 +1361,12 @@ define void @v_shuffle_v2p3_v8p3__15_15(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__15_15:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1416,12 +1404,11 @@ define void @v_shuffle_v2p3_v8p3__u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1756,13 +1743,12 @@ define void @v_shuffle_v2p3_v8p3__7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1801,12 +1787,11 @@ define void @v_shuffle_v2p3_v8p3__8_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__8_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[1:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[0:1], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2491,12 +2476,12 @@ define void @v_shuffle_v2p3_v8p3__7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v1
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3228,12 +3213,12 @@ define void @v_shuffle_v2p3_v8p3__7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[1:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3961,12 +3946,12 @@ define void @v_shuffle_v2p3_v8p3__7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v3
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4698,12 +4683,12 @@ define void @v_shuffle_v2p3_v8p3__7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[3:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5431,12 +5416,12 @@ define void @v_shuffle_v2p3_v8p3__7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v5
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6170,12 +6155,12 @@ define void @v_shuffle_v2p3_v8p3__7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[5:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6901,12 +6886,12 @@ define void @v_shuffle_v2p3_v8p3__7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v7
+; GFX900-NEXT: global_store_dwordx2 v9, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7590,12 +7575,11 @@ define void @v_shuffle_v2p3_v8p3__7_8(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v2p3_v8p3__7_8:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8320,11 +8304,11 @@ define void @v_shuffle_v2p3_v8p3__7_9(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9084,11 +9068,11 @@ define void @v_shuffle_v2p3_v8p3__7_10(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[9:10], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9852,11 +9836,11 @@ define void @v_shuffle_v2p3_v8p3__7_11(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10616,11 +10600,11 @@ define void @v_shuffle_v2p3_v8p3__7_12(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[11:12], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11384,11 +11368,11 @@ define void @v_shuffle_v2p3_v8p3__7_13(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12148,11 +12132,11 @@ define void @v_shuffle_v2p3_v8p3__7_14(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[13:14], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12918,11 +12902,11 @@ define void @v_shuffle_v2p3_v8p3__7_15(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, v7
-; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[7:8], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
index 16202a708fd5c..d38b17c04947b 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v2f32.ll
@@ -776,15 +776,14 @@ define void @v_shuffle_v3f32_v2f32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v2f32__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v2, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
index 131204c8a6430..4032d31cbb041 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v3f32.ll
@@ -99,36 +99,33 @@ define void @v_shuffle_v3f32_v3f32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -194,36 +191,33 @@ define void @v_shuffle_v3f32_v3f32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -237,48 +231,45 @@ define void @v_shuffle_v3f32_v3f32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -292,46 +283,43 @@ define void @v_shuffle_v3f32_v3f32__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -345,15 +333,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -361,15 +348,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -377,15 +363,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -401,37 +386,35 @@ define void @v_shuffle_v3f32_v3f32__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -445,36 +428,37 @@ define void @v_shuffle_v3f32_v3f32__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -488,38 +472,37 @@ define void @v_shuffle_v3f32_v3f32__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -533,50 +516,46 @@ define void @v_shuffle_v3f32_v3f32__5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -591,14 +570,14 @@ define void @v_shuffle_v3f32_v3f32__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -606,16 +585,15 @@ define void @v_shuffle_v3f32_v3f32__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -623,17 +601,15 @@ define void @v_shuffle_v3f32_v3f32__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -648,14 +624,14 @@ define void @v_shuffle_v3f32_v3f32__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -663,15 +639,14 @@ define void @v_shuffle_v3f32_v3f32__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -679,16 +654,15 @@ define void @v_shuffle_v3f32_v3f32__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -702,42 +676,40 @@ define void @v_shuffle_v3f32_v3f32__5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -751,42 +723,40 @@ define void @v_shuffle_v3f32_v3f32__5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -800,39 +770,40 @@ define void @v_shuffle_v3f32_v3f32__5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -858,26 +829,25 @@ define void @v_shuffle_v3f32_v3f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -948,29 +918,27 @@ define void @v_shuffle_v3f32_v3f32__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -983,42 +951,40 @@ define void @v_shuffle_v3f32_v3f32__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1043,26 +1009,25 @@ define void @v_shuffle_v3f32_v3f32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1091,16 +1056,15 @@ define void @v_shuffle_v3f32_v3f32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1108,17 +1072,15 @@ define void @v_shuffle_v3f32_v3f32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v7, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1133,50 +1095,45 @@ define void @v_shuffle_v3f32_v3f32__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1190,49 +1147,42 @@ define void @v_shuffle_v3f32_v3f32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1246,52 +1196,46 @@ define void @v_shuffle_v3f32_v3f32__5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1305,50 +1249,46 @@ define void @v_shuffle_v3f32_v3f32__5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1362,50 +1302,46 @@ define void @v_shuffle_v3f32_v3f32__5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1419,49 +1355,46 @@ define void @v_shuffle_v3f32_v3f32__5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1604,39 +1537,40 @@ define void @v_shuffle_v3f32_v3f32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1747,15 +1681,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1763,15 +1696,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1779,16 +1711,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1802,15 +1732,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1818,15 +1747,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,16 +1762,14 @@ define void @v_shuffle_v3f32_v3f32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1857,15 +1783,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1873,16 +1798,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1890,17 +1814,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1915,15 +1837,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1933,14 +1854,13 @@ define void @v_shuffle_v3f32_v3f32__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1950,14 +1870,13 @@ define void @v_shuffle_v3f32_v3f32__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1974,13 +1893,12 @@ define void @v_shuffle_v3f32_v3f32__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1988,16 +1906,15 @@ define void @v_shuffle_v3f32_v3f32__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2005,16 +1922,15 @@ define void @v_shuffle_v3f32_v3f32__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2029,15 +1945,14 @@ define void @v_shuffle_v3f32_v3f32__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2045,15 +1960,15 @@ define void @v_shuffle_v3f32_v3f32__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2061,16 +1976,15 @@ define void @v_shuffle_v3f32_v3f32__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2212,39 +2126,40 @@ define void @v_shuffle_v3f32_v3f32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2353,16 +2268,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2370,15 +2284,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2386,15 +2299,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2408,46 +2320,43 @@ define void @v_shuffle_v3f32_v3f32__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2462,15 +2371,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2480,14 +2388,13 @@ define void @v_shuffle_v3f32_v3f32__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2497,14 +2404,13 @@ define void @v_shuffle_v3f32_v3f32__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2518,46 +2424,43 @@ define void @v_shuffle_v3f32_v3f32__5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2574,13 +2477,12 @@ define void @v_shuffle_v3f32_v3f32__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2588,15 +2490,14 @@ define void @v_shuffle_v3f32_v3f32__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2604,16 +2505,15 @@ define void @v_shuffle_v3f32_v3f32__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2628,15 +2528,14 @@ define void @v_shuffle_v3f32_v3f32__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2644,15 +2543,14 @@ define void @v_shuffle_v3f32_v3f32__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2660,15 +2558,15 @@ define void @v_shuffle_v3f32_v3f32__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2773,36 +2671,33 @@ define void @v_shuffle_v3f32_v3f32__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2839,29 +2734,27 @@ define void @v_shuffle_v3f32_v3f32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2875,42 +2768,40 @@ define void @v_shuffle_v3f32_v3f32__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2924,13 +2815,12 @@ define void @v_shuffle_v3f32_v3f32__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2970,14 +2860,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2985,16 +2875,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3002,16 +2891,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3026,14 +2914,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3041,14 +2929,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3056,15 +2944,15 @@ define void @v_shuffle_v3f32_v3f32__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3079,14 +2967,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3094,16 +2982,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3111,16 +2998,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3147,29 +3033,27 @@ define void @v_shuffle_v3f32_v3f32__5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3340,12 +3224,11 @@ define void @v_shuffle_v3f32_v3f32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3355,13 +3238,13 @@ define void @v_shuffle_v3f32_v3f32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3371,13 +3254,13 @@ define void @v_shuffle_v3f32_v3f32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3480,39 +3363,40 @@ define void @v_shuffle_v3f32_v3f32__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3526,38 +3410,37 @@ define void @v_shuffle_v3f32_v3f32__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3572,15 +3455,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3588,16 +3470,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3605,16 +3486,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3629,15 +3509,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3645,15 +3524,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3661,16 +3539,15 @@ define void @v_shuffle_v3f32_v3f32__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3685,15 +3562,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3701,16 +3577,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3718,16 +3593,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3741,41 +3615,40 @@ define void @v_shuffle_v3f32_v3f32__5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3945,15 +3818,15 @@ define void @v_shuffle_v3f32_v3f32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3963,13 +3836,13 @@ define void @v_shuffle_v3f32_v3f32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3979,13 +3852,13 @@ define void @v_shuffle_v3f32_v3f32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4087,36 +3960,37 @@ define void @v_shuffle_v3f32_v3f32__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4131,15 +4005,14 @@ define void @v_shuffle_v3f32_v3f32__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4147,15 +4020,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4163,16 +4036,15 @@ define void @v_shuffle_v3f32_v3f32__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4187,15 +4059,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4203,15 +4074,14 @@ define void @v_shuffle_v3f32_v3f32__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4219,16 +4089,15 @@ define void @v_shuffle_v3f32_v3f32__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4243,15 +4112,14 @@ define void @v_shuffle_v3f32_v3f32__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4259,15 +4127,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4275,15 +4143,15 @@ define void @v_shuffle_v3f32_v3f32__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4297,42 +4165,40 @@ define void @v_shuffle_v3f32_v3f32__5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4346,36 +4212,40 @@ define void @v_shuffle_v3f32_v3f32__5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v3f32__5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v3f32__5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v3f32__5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
index c5a08f098b4c6..2e4131d378906 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3f32.v4f32.ll
@@ -99,36 +99,33 @@ define void @v_shuffle_v3f32_v4f32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -141,12 +138,11 @@ define void @v_shuffle_v3f32_v4f32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -236,36 +232,33 @@ define void @v_shuffle_v3f32_v4f32__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__6_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -279,12 +272,11 @@ define void @v_shuffle_v3f32_v4f32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -322,16 +314,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -378,15 +368,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -431,15 +420,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -485,16 +473,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,9 +494,8 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -524,9 +509,9 @@ define void @v_shuffle_v3f32_v4f32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -540,13 +525,12 @@ define void @v_shuffle_v3f32_v4f32__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -586,12 +570,12 @@ define void @v_shuffle_v3f32_v4f32__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -629,12 +613,12 @@ define void @v_shuffle_v3f32_v4f32__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -674,13 +658,12 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -691,9 +674,8 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -704,9 +686,8 @@ define void @v_shuffle_v3f32_v4f32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -720,17 +701,15 @@ define void @v_shuffle_v3f32_v4f32__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -740,14 +719,12 @@ define void @v_shuffle_v3f32_v4f32__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -757,15 +734,12 @@ define void @v_shuffle_v3f32_v4f32__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -780,16 +754,14 @@ define void @v_shuffle_v3f32_v4f32__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -797,16 +769,15 @@ define void @v_shuffle_v3f32_v4f32__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -814,17 +785,15 @@ define void @v_shuffle_v3f32_v4f32__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -839,15 +808,14 @@ define void @v_shuffle_v3f32_v4f32__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -855,15 +823,14 @@ define void @v_shuffle_v3f32_v4f32__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -871,16 +838,15 @@ define void @v_shuffle_v3f32_v4f32__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -895,15 +861,14 @@ define void @v_shuffle_v3f32_v4f32__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -911,16 +876,15 @@ define void @v_shuffle_v3f32_v4f32__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -928,16 +892,15 @@ define void @v_shuffle_v3f32_v4f32__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -951,43 +914,39 @@ define void @v_shuffle_v3f32_v4f32__7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1001,42 +960,39 @@ define void @v_shuffle_v3f32_v4f32__7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1050,13 +1006,13 @@ define void @v_shuffle_v3f32_v4f32__7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1096,13 +1052,13 @@ define void @v_shuffle_v3f32_v4f32__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1112,11 +1068,10 @@ define void @v_shuffle_v3f32_v4f32__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1126,11 +1081,10 @@ define void @v_shuffle_v3f32_v4f32__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1145,38 +1099,36 @@ define void @v_shuffle_v3f32_v4f32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1247,29 +1199,27 @@ define void @v_shuffle_v3f32_v4f32__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1285,40 +1235,36 @@ define void @v_shuffle_v3f32_v4f32__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1331,43 +1277,40 @@ define void @v_shuffle_v3f32_v4f32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1381,38 +1324,36 @@ define void @v_shuffle_v3f32_v4f32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1442,16 +1383,15 @@ define void @v_shuffle_v3f32_v4f32__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1459,17 +1399,15 @@ define void @v_shuffle_v3f32_v4f32__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1486,14 +1424,12 @@ define void @v_shuffle_v3f32_v4f32__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1503,14 +1439,12 @@ define void @v_shuffle_v3f32_v4f32__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1520,15 +1454,13 @@ define void @v_shuffle_v3f32_v4f32__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1543,16 +1475,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1560,16 +1490,15 @@ define void @v_shuffle_v3f32_v4f32__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1577,17 +1506,15 @@ define void @v_shuffle_v3f32_v4f32__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1601,16 +1528,14 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1618,15 +1543,14 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1634,16 +1558,15 @@ define void @v_shuffle_v3f32_v4f32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1657,17 +1580,15 @@ define void @v_shuffle_v3f32_v4f32__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1675,16 +1596,15 @@ define void @v_shuffle_v3f32_v4f32__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1692,17 +1612,15 @@ define void @v_shuffle_v3f32_v4f32__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1716,50 +1634,48 @@ define void @v_shuffle_v3f32_v4f32__7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v9
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v10, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v9
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v10, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1773,17 +1689,15 @@ define void @v_shuffle_v3f32_v4f32__7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1829,17 +1743,15 @@ define void @v_shuffle_v3f32_v4f32__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1847,16 +1759,15 @@ define void @v_shuffle_v3f32_v4f32__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[8:10], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1864,16 +1775,16 @@ define void @v_shuffle_v3f32_v4f32__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx3 v6, v[8:10], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1887,16 +1798,15 @@ define void @v_shuffle_v3f32_v4f32__7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1943,16 +1853,15 @@ define void @v_shuffle_v3f32_v4f32__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1960,16 +1869,15 @@ define void @v_shuffle_v3f32_v4f32__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,17 +1885,16 @@ define void @v_shuffle_v3f32_v4f32__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2133,10 +2040,10 @@ define void @v_shuffle_v3f32_v4f32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2146,10 +2053,10 @@ define void @v_shuffle_v3f32_v4f32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2159,10 +2066,10 @@ define void @v_shuffle_v3f32_v4f32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2175,13 +2082,13 @@ define void @v_shuffle_v3f32_v4f32__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2319,15 +2226,14 @@ define void @v_shuffle_v3f32_v4f32__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2335,15 +2241,15 @@ define void @v_shuffle_v3f32_v4f32__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2351,16 +2257,15 @@ define void @v_shuffle_v3f32_v4f32__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2375,15 +2280,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2430,16 +2334,14 @@ define void @v_shuffle_v3f32_v4f32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2486,17 +2388,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2546,16 +2445,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2604,15 +2501,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2659,16 +2555,14 @@ define void @v_shuffle_v3f32_v4f32__7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2717,15 +2611,14 @@ define void @v_shuffle_v3f32_v4f32__7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2773,15 +2666,14 @@ define void @v_shuffle_v3f32_v4f32__7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2961,10 +2853,10 @@ define void @v_shuffle_v3f32_v4f32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2974,10 +2866,10 @@ define void @v_shuffle_v3f32_v4f32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2987,10 +2879,10 @@ define void @v_shuffle_v3f32_v4f32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3003,13 +2895,13 @@ define void @v_shuffle_v3f32_v4f32__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3146,15 +3038,14 @@ define void @v_shuffle_v3f32_v4f32__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3162,15 +3053,14 @@ define void @v_shuffle_v3f32_v4f32__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3178,15 +3068,15 @@ define void @v_shuffle_v3f32_v4f32__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3201,15 +3091,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3255,15 +3144,14 @@ define void @v_shuffle_v3f32_v4f32__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3309,16 +3197,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3366,15 +3252,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3420,15 +3305,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3475,15 +3359,14 @@ define void @v_shuffle_v3f32_v4f32__7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3531,14 +3414,13 @@ define void @v_shuffle_v3f32_v4f32__7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3586,15 +3468,14 @@ define void @v_shuffle_v3f32_v4f32__7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3641,38 +3522,37 @@ define void @v_shuffle_v3f32_v4f32__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3745,11 +3625,10 @@ define void @v_shuffle_v3f32_v4f32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3759,11 +3638,10 @@ define void @v_shuffle_v3f32_v4f32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3776,41 +3654,37 @@ define void @v_shuffle_v3f32_v4f32__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3823,13 +3697,13 @@ define void @v_shuffle_v3f32_v4f32__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3839,11 +3713,10 @@ define void @v_shuffle_v3f32_v4f32__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3853,11 +3726,10 @@ define void @v_shuffle_v3f32_v4f32__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3870,38 +3742,37 @@ define void @v_shuffle_v3f32_v4f32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3933,14 +3804,13 @@ define void @v_shuffle_v3f32_v4f32__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3950,14 +3820,13 @@ define void @v_shuffle_v3f32_v4f32__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3972,15 +3841,14 @@ define void @v_shuffle_v3f32_v4f32__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3988,16 +3856,15 @@ define void @v_shuffle_v3f32_v4f32__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4005,16 +3872,15 @@ define void @v_shuffle_v3f32_v4f32__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4028,16 +3894,15 @@ define void @v_shuffle_v3f32_v4f32__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4047,14 +3912,13 @@ define void @v_shuffle_v3f32_v4f32__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4064,14 +3928,13 @@ define void @v_shuffle_v3f32_v4f32__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4085,15 +3948,14 @@ define void @v_shuffle_v3f32_v4f32__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4140,15 +4002,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4197,15 +4058,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4251,15 +4111,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4308,15 +4167,14 @@ define void @v_shuffle_v3f32_v4f32__7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4365,15 +4223,14 @@ define void @v_shuffle_v3f32_v4f32__7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4420,15 +4277,14 @@ define void @v_shuffle_v3f32_v4f32__7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4567,36 +4423,33 @@ define void @v_shuffle_v3f32_v4f32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4609,12 +4462,11 @@ define void @v_shuffle_v3f32_v4f32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4675,29 +4527,27 @@ define void @v_shuffle_v3f32_v4f32__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4714,40 +4564,36 @@ define void @v_shuffle_v3f32_v4f32__6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4761,43 +4607,40 @@ define void @v_shuffle_v3f32_v4f32__7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4811,40 +4654,37 @@ define void @v_shuffle_v3f32_v4f32__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4859,50 +4699,47 @@ define void @v_shuffle_v3f32_v4f32__7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: global_store_dwordx3 v6, v[8:10], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: global_store_dwordx3 v10, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx3 v6, v[8:10], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: global_store_dwordx3 v10, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4917,14 +4754,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4970,14 +4807,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4985,15 +4822,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -5002,15 +4838,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5026,16 +4861,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5049,10 +4882,8 @@ define void @v_shuffle_v3f32_v4f32__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5066,10 +4897,9 @@ define void @v_shuffle_v3f32_v4f32__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5083,43 +4913,40 @@ define void @v_shuffle_v3f32_v4f32__7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5133,42 +4960,40 @@ define void @v_shuffle_v3f32_v4f32__7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5342,9 +5167,8 @@ define void @v_shuffle_v3f32_v4f32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5352,15 +5176,15 @@ define void @v_shuffle_v3f32_v4f32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5368,15 +5192,15 @@ define void @v_shuffle_v3f32_v4f32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5394,12 +5218,11 @@ define void @v_shuffle_v3f32_v4f32__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5537,10 +5360,10 @@ define void @v_shuffle_v3f32_v4f32__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5550,10 +5373,10 @@ define void @v_shuffle_v3f32_v4f32__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5563,10 +5386,10 @@ define void @v_shuffle_v3f32_v4f32__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5580,13 +5403,13 @@ define void @v_shuffle_v3f32_v4f32__7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5626,13 +5449,12 @@ define void @v_shuffle_v3f32_v4f32__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5673,16 +5495,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5731,15 +5551,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5787,15 +5606,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5844,16 +5662,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5863,14 +5679,13 @@ define void @v_shuffle_v3f32_v4f32__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5880,14 +5695,13 @@ define void @v_shuffle_v3f32_v4f32__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5901,14 +5715,13 @@ define void @v_shuffle_v3f32_v4f32__7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6162,9 +5975,9 @@ define void @v_shuffle_v3f32_v4f32__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6172,15 +5985,15 @@ define void @v_shuffle_v3f32_v4f32__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6188,15 +6001,15 @@ define void @v_shuffle_v3f32_v4f32__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6214,12 +6027,12 @@ define void @v_shuffle_v3f32_v4f32__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6356,10 +6169,10 @@ define void @v_shuffle_v3f32_v4f32__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6369,10 +6182,10 @@ define void @v_shuffle_v3f32_v4f32__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6382,10 +6195,10 @@ define void @v_shuffle_v3f32_v4f32__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6399,13 +6212,13 @@ define void @v_shuffle_v3f32_v4f32__7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6445,12 +6258,12 @@ define void @v_shuffle_v3f32_v4f32__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6489,15 +6302,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6545,15 +6357,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6601,15 +6412,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6656,15 +6466,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6672,15 +6481,15 @@ define void @v_shuffle_v3f32_v4f32__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6688,15 +6497,15 @@ define void @v_shuffle_v3f32_v4f32__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6759,12 +6568,13 @@ define void @v_shuffle_v3f32_v4f32__7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6802,38 +6612,37 @@ define void @v_shuffle_v3f32_v4f32__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__u_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6920,16 +6729,15 @@ define void @v_shuffle_v3f32_v4f32__1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6937,16 +6745,15 @@ define void @v_shuffle_v3f32_v4f32__1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6967,9 +6774,9 @@ define void @v_shuffle_v3f32_v4f32__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6979,14 +6786,13 @@ define void @v_shuffle_v3f32_v4f32__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6996,14 +6802,13 @@ define void @v_shuffle_v3f32_v4f32__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7021,12 +6826,12 @@ define void @v_shuffle_v3f32_v4f32__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7034,16 +6839,15 @@ define void @v_shuffle_v3f32_v4f32__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7051,16 +6855,16 @@ define void @v_shuffle_v3f32_v4f32__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7135,11 +6939,10 @@ define void @v_shuffle_v3f32_v4f32__5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7149,11 +6952,10 @@ define void @v_shuffle_v3f32_v4f32__5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7167,41 +6969,37 @@ define void @v_shuffle_v3f32_v4f32__6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3f32_v4f32__6_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3f32_v4f32__6_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7215,12 +7013,12 @@ define void @v_shuffle_v3f32_v4f32__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7261,15 +7059,14 @@ define void @v_shuffle_v3f32_v4f32__7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7319,15 +7116,14 @@ define void @v_shuffle_v3f32_v4f32__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7375,15 +7171,14 @@ define void @v_shuffle_v3f32_v4f32__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7432,15 +7227,14 @@ define void @v_shuffle_v3f32_v4f32__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7450,14 +7244,13 @@ define void @v_shuffle_v3f32_v4f32__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7467,14 +7260,13 @@ define void @v_shuffle_v3f32_v4f32__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7488,13 +7280,13 @@ define void @v_shuffle_v3f32_v4f32__7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7537,13 +7329,13 @@ define void @v_shuffle_v3f32_v4f32__7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7583,12 +7375,13 @@ define void @v_shuffle_v3f32_v4f32__7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3f32_v4f32__7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
index f36f23a3a932d..6a0527f7cca24 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v2i32.ll
@@ -776,15 +776,14 @@ define void @v_shuffle_v3i32_v2i32__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v2i32__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v2, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
index eacf77c931a68..65ceea2299e10 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v3i32.ll
@@ -99,36 +99,33 @@ define void @v_shuffle_v3i32_v3i32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -194,36 +191,33 @@ define void @v_shuffle_v3i32_v3i32__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -237,48 +231,45 @@ define void @v_shuffle_v3i32_v3i32__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -292,46 +283,43 @@ define void @v_shuffle_v3i32_v3i32__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -345,15 +333,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -361,15 +348,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -377,15 +363,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -401,37 +386,35 @@ define void @v_shuffle_v3i32_v3i32__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -445,36 +428,37 @@ define void @v_shuffle_v3i32_v3i32__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -488,38 +472,37 @@ define void @v_shuffle_v3i32_v3i32__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -533,50 +516,46 @@ define void @v_shuffle_v3i32_v3i32__5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -591,14 +570,14 @@ define void @v_shuffle_v3i32_v3i32__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -606,16 +585,15 @@ define void @v_shuffle_v3i32_v3i32__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -623,17 +601,15 @@ define void @v_shuffle_v3i32_v3i32__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -648,14 +624,14 @@ define void @v_shuffle_v3i32_v3i32__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -663,15 +639,14 @@ define void @v_shuffle_v3i32_v3i32__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -679,16 +654,15 @@ define void @v_shuffle_v3i32_v3i32__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -702,42 +676,40 @@ define void @v_shuffle_v3i32_v3i32__5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -751,42 +723,40 @@ define void @v_shuffle_v3i32_v3i32__5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -800,39 +770,40 @@ define void @v_shuffle_v3i32_v3i32__5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -858,26 +829,25 @@ define void @v_shuffle_v3i32_v3i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -948,29 +918,27 @@ define void @v_shuffle_v3i32_v3i32__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -983,42 +951,40 @@ define void @v_shuffle_v3i32_v3i32__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1043,26 +1009,25 @@ define void @v_shuffle_v3i32_v3i32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1091,16 +1056,15 @@ define void @v_shuffle_v3i32_v3i32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1108,17 +1072,15 @@ define void @v_shuffle_v3i32_v3i32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v7, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1133,50 +1095,45 @@ define void @v_shuffle_v3i32_v3i32__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1190,49 +1147,42 @@ define void @v_shuffle_v3i32_v3i32__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1246,52 +1196,46 @@ define void @v_shuffle_v3i32_v3i32__5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1305,50 +1249,46 @@ define void @v_shuffle_v3i32_v3i32__5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1362,50 +1302,46 @@ define void @v_shuffle_v3i32_v3i32__5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1419,49 +1355,46 @@ define void @v_shuffle_v3i32_v3i32__5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1604,39 +1537,40 @@ define void @v_shuffle_v3i32_v3i32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1747,15 +1681,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1763,15 +1696,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1779,16 +1711,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1802,15 +1732,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1818,15 +1747,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,16 +1762,14 @@ define void @v_shuffle_v3i32_v3i32__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1857,15 +1783,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1873,16 +1798,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1890,17 +1814,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1915,15 +1837,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1933,14 +1854,13 @@ define void @v_shuffle_v3i32_v3i32__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1950,14 +1870,13 @@ define void @v_shuffle_v3i32_v3i32__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1974,13 +1893,12 @@ define void @v_shuffle_v3i32_v3i32__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1988,16 +1906,15 @@ define void @v_shuffle_v3i32_v3i32__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2005,16 +1922,15 @@ define void @v_shuffle_v3i32_v3i32__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2029,15 +1945,14 @@ define void @v_shuffle_v3i32_v3i32__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2045,15 +1960,15 @@ define void @v_shuffle_v3i32_v3i32__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2061,16 +1976,15 @@ define void @v_shuffle_v3i32_v3i32__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2212,39 +2126,40 @@ define void @v_shuffle_v3i32_v3i32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2353,16 +2268,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2370,15 +2284,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2386,15 +2299,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2408,46 +2320,43 @@ define void @v_shuffle_v3i32_v3i32__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2462,15 +2371,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2480,14 +2388,13 @@ define void @v_shuffle_v3i32_v3i32__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2497,14 +2404,13 @@ define void @v_shuffle_v3i32_v3i32__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2518,46 +2424,43 @@ define void @v_shuffle_v3i32_v3i32__5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2574,13 +2477,12 @@ define void @v_shuffle_v3i32_v3i32__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2588,15 +2490,14 @@ define void @v_shuffle_v3i32_v3i32__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2604,16 +2505,15 @@ define void @v_shuffle_v3i32_v3i32__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2628,15 +2528,14 @@ define void @v_shuffle_v3i32_v3i32__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2644,15 +2543,14 @@ define void @v_shuffle_v3i32_v3i32__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2660,15 +2558,15 @@ define void @v_shuffle_v3i32_v3i32__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2773,36 +2671,33 @@ define void @v_shuffle_v3i32_v3i32__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2839,29 +2734,27 @@ define void @v_shuffle_v3i32_v3i32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2875,42 +2768,40 @@ define void @v_shuffle_v3i32_v3i32__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2924,13 +2815,12 @@ define void @v_shuffle_v3i32_v3i32__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2970,14 +2860,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2985,16 +2875,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3002,16 +2891,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3026,14 +2914,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3041,14 +2929,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3056,15 +2944,15 @@ define void @v_shuffle_v3i32_v3i32__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3079,14 +2967,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3094,16 +2982,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3111,16 +2998,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3147,29 +3033,27 @@ define void @v_shuffle_v3i32_v3i32__5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3340,12 +3224,11 @@ define void @v_shuffle_v3i32_v3i32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3355,13 +3238,13 @@ define void @v_shuffle_v3i32_v3i32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3371,13 +3254,13 @@ define void @v_shuffle_v3i32_v3i32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3480,39 +3363,40 @@ define void @v_shuffle_v3i32_v3i32__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3526,38 +3410,37 @@ define void @v_shuffle_v3i32_v3i32__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3572,15 +3455,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3588,16 +3470,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3605,16 +3486,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3629,15 +3509,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3645,15 +3524,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3661,16 +3539,15 @@ define void @v_shuffle_v3i32_v3i32__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3685,15 +3562,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3701,16 +3577,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3718,16 +3593,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3741,41 +3615,40 @@ define void @v_shuffle_v3i32_v3i32__5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3945,15 +3818,15 @@ define void @v_shuffle_v3i32_v3i32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3963,13 +3836,13 @@ define void @v_shuffle_v3i32_v3i32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3979,13 +3852,13 @@ define void @v_shuffle_v3i32_v3i32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4087,36 +3960,37 @@ define void @v_shuffle_v3i32_v3i32__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4131,15 +4005,14 @@ define void @v_shuffle_v3i32_v3i32__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4147,15 +4020,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4163,16 +4036,15 @@ define void @v_shuffle_v3i32_v3i32__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4187,15 +4059,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4203,15 +4074,14 @@ define void @v_shuffle_v3i32_v3i32__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4219,16 +4089,15 @@ define void @v_shuffle_v3i32_v3i32__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4243,15 +4112,14 @@ define void @v_shuffle_v3i32_v3i32__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4259,15 +4127,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4275,15 +4143,15 @@ define void @v_shuffle_v3i32_v3i32__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4297,42 +4165,40 @@ define void @v_shuffle_v3i32_v3i32__5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4346,36 +4212,40 @@ define void @v_shuffle_v3i32_v3i32__5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v3i32__5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v3i32__5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v3i32__5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
index 92d6c95c26599..3d838b9952147 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i32.v4i32.ll
@@ -99,36 +99,33 @@ define void @v_shuffle_v3i32_v4i32__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -141,12 +138,11 @@ define void @v_shuffle_v3i32_v4i32__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -236,36 +232,33 @@ define void @v_shuffle_v3i32_v4i32__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -279,12 +272,11 @@ define void @v_shuffle_v3i32_v4i32__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -322,16 +314,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -378,15 +368,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -431,15 +420,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -485,16 +473,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,9 +494,8 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -524,9 +509,9 @@ define void @v_shuffle_v3i32_v4i32__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -540,13 +525,12 @@ define void @v_shuffle_v3i32_v4i32__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -586,12 +570,12 @@ define void @v_shuffle_v3i32_v4i32__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -629,12 +613,12 @@ define void @v_shuffle_v3i32_v4i32__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -674,13 +658,12 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -691,9 +674,8 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -704,9 +686,8 @@ define void @v_shuffle_v3i32_v4i32__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -720,17 +701,15 @@ define void @v_shuffle_v3i32_v4i32__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -740,14 +719,12 @@ define void @v_shuffle_v3i32_v4i32__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -757,15 +734,12 @@ define void @v_shuffle_v3i32_v4i32__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -780,16 +754,14 @@ define void @v_shuffle_v3i32_v4i32__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -797,16 +769,15 @@ define void @v_shuffle_v3i32_v4i32__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -814,17 +785,15 @@ define void @v_shuffle_v3i32_v4i32__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -839,15 +808,14 @@ define void @v_shuffle_v3i32_v4i32__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -855,15 +823,14 @@ define void @v_shuffle_v3i32_v4i32__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -871,16 +838,15 @@ define void @v_shuffle_v3i32_v4i32__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -895,15 +861,14 @@ define void @v_shuffle_v3i32_v4i32__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -911,16 +876,15 @@ define void @v_shuffle_v3i32_v4i32__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -928,16 +892,15 @@ define void @v_shuffle_v3i32_v4i32__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -951,43 +914,39 @@ define void @v_shuffle_v3i32_v4i32__7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1001,42 +960,39 @@ define void @v_shuffle_v3i32_v4i32__7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1050,13 +1006,13 @@ define void @v_shuffle_v3i32_v4i32__7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1096,13 +1052,13 @@ define void @v_shuffle_v3i32_v4i32__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1112,11 +1068,10 @@ define void @v_shuffle_v3i32_v4i32__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1126,11 +1081,10 @@ define void @v_shuffle_v3i32_v4i32__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1145,38 +1099,36 @@ define void @v_shuffle_v3i32_v4i32__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1247,29 +1199,27 @@ define void @v_shuffle_v3i32_v4i32__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1285,40 +1235,36 @@ define void @v_shuffle_v3i32_v4i32__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1331,43 +1277,40 @@ define void @v_shuffle_v3i32_v4i32__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1381,38 +1324,36 @@ define void @v_shuffle_v3i32_v4i32__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1442,16 +1383,15 @@ define void @v_shuffle_v3i32_v4i32__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1459,17 +1399,15 @@ define void @v_shuffle_v3i32_v4i32__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1486,14 +1424,12 @@ define void @v_shuffle_v3i32_v4i32__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1503,14 +1439,12 @@ define void @v_shuffle_v3i32_v4i32__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1520,15 +1454,13 @@ define void @v_shuffle_v3i32_v4i32__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1543,16 +1475,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1560,16 +1490,15 @@ define void @v_shuffle_v3i32_v4i32__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1577,17 +1506,15 @@ define void @v_shuffle_v3i32_v4i32__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1601,16 +1528,14 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1618,15 +1543,14 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1634,16 +1558,15 @@ define void @v_shuffle_v3i32_v4i32__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1657,17 +1580,15 @@ define void @v_shuffle_v3i32_v4i32__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1675,16 +1596,15 @@ define void @v_shuffle_v3i32_v4i32__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1692,17 +1612,15 @@ define void @v_shuffle_v3i32_v4i32__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1716,50 +1634,48 @@ define void @v_shuffle_v3i32_v4i32__7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v9
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v10, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v9
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v10, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1773,17 +1689,15 @@ define void @v_shuffle_v3i32_v4i32__7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1829,17 +1743,15 @@ define void @v_shuffle_v3i32_v4i32__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1847,16 +1759,15 @@ define void @v_shuffle_v3i32_v4i32__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[8:10], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1864,16 +1775,16 @@ define void @v_shuffle_v3i32_v4i32__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx3 v6, v[8:10], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1887,16 +1798,15 @@ define void @v_shuffle_v3i32_v4i32__7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1943,16 +1853,15 @@ define void @v_shuffle_v3i32_v4i32__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1960,16 +1869,15 @@ define void @v_shuffle_v3i32_v4i32__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,17 +1885,16 @@ define void @v_shuffle_v3i32_v4i32__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2133,10 +2040,10 @@ define void @v_shuffle_v3i32_v4i32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2146,10 +2053,10 @@ define void @v_shuffle_v3i32_v4i32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2159,10 +2066,10 @@ define void @v_shuffle_v3i32_v4i32__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2175,13 +2082,13 @@ define void @v_shuffle_v3i32_v4i32__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2319,15 +2226,14 @@ define void @v_shuffle_v3i32_v4i32__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2335,15 +2241,15 @@ define void @v_shuffle_v3i32_v4i32__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2351,16 +2257,15 @@ define void @v_shuffle_v3i32_v4i32__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2375,15 +2280,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2430,16 +2334,14 @@ define void @v_shuffle_v3i32_v4i32__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2486,17 +2388,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2546,16 +2445,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2604,15 +2501,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2659,16 +2555,14 @@ define void @v_shuffle_v3i32_v4i32__7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2717,15 +2611,14 @@ define void @v_shuffle_v3i32_v4i32__7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2773,15 +2666,14 @@ define void @v_shuffle_v3i32_v4i32__7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2961,10 +2853,10 @@ define void @v_shuffle_v3i32_v4i32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2974,10 +2866,10 @@ define void @v_shuffle_v3i32_v4i32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2987,10 +2879,10 @@ define void @v_shuffle_v3i32_v4i32__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3003,13 +2895,13 @@ define void @v_shuffle_v3i32_v4i32__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3146,15 +3038,14 @@ define void @v_shuffle_v3i32_v4i32__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3162,15 +3053,14 @@ define void @v_shuffle_v3i32_v4i32__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3178,15 +3068,15 @@ define void @v_shuffle_v3i32_v4i32__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3201,15 +3091,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3255,15 +3144,14 @@ define void @v_shuffle_v3i32_v4i32__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3309,16 +3197,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3366,15 +3252,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3420,15 +3305,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3475,15 +3359,14 @@ define void @v_shuffle_v3i32_v4i32__7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3531,14 +3414,13 @@ define void @v_shuffle_v3i32_v4i32__7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3586,15 +3468,14 @@ define void @v_shuffle_v3i32_v4i32__7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3641,38 +3522,37 @@ define void @v_shuffle_v3i32_v4i32__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3745,11 +3625,10 @@ define void @v_shuffle_v3i32_v4i32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3759,11 +3638,10 @@ define void @v_shuffle_v3i32_v4i32__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3776,41 +3654,37 @@ define void @v_shuffle_v3i32_v4i32__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3823,13 +3697,13 @@ define void @v_shuffle_v3i32_v4i32__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3839,11 +3713,10 @@ define void @v_shuffle_v3i32_v4i32__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3853,11 +3726,10 @@ define void @v_shuffle_v3i32_v4i32__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3870,38 +3742,37 @@ define void @v_shuffle_v3i32_v4i32__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3933,14 +3804,13 @@ define void @v_shuffle_v3i32_v4i32__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3950,14 +3820,13 @@ define void @v_shuffle_v3i32_v4i32__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3972,15 +3841,14 @@ define void @v_shuffle_v3i32_v4i32__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3988,16 +3856,15 @@ define void @v_shuffle_v3i32_v4i32__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4005,16 +3872,15 @@ define void @v_shuffle_v3i32_v4i32__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4028,16 +3894,15 @@ define void @v_shuffle_v3i32_v4i32__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4047,14 +3912,13 @@ define void @v_shuffle_v3i32_v4i32__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4064,14 +3928,13 @@ define void @v_shuffle_v3i32_v4i32__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4085,15 +3948,14 @@ define void @v_shuffle_v3i32_v4i32__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4140,15 +4002,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4197,15 +4058,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4251,15 +4111,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4308,15 +4167,14 @@ define void @v_shuffle_v3i32_v4i32__7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4365,15 +4223,14 @@ define void @v_shuffle_v3i32_v4i32__7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4420,15 +4277,14 @@ define void @v_shuffle_v3i32_v4i32__7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4567,36 +4423,33 @@ define void @v_shuffle_v3i32_v4i32__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4609,12 +4462,11 @@ define void @v_shuffle_v3i32_v4i32__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4675,29 +4527,27 @@ define void @v_shuffle_v3i32_v4i32__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4714,40 +4564,36 @@ define void @v_shuffle_v3i32_v4i32__6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4761,43 +4607,40 @@ define void @v_shuffle_v3i32_v4i32__7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4811,40 +4654,37 @@ define void @v_shuffle_v3i32_v4i32__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4859,50 +4699,47 @@ define void @v_shuffle_v3i32_v4i32__7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: global_store_dwordx3 v6, v[8:10], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: global_store_dwordx3 v10, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx3 v6, v[8:10], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: global_store_dwordx3 v10, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4917,14 +4754,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4970,14 +4807,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4985,15 +4822,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -5002,15 +4838,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5026,16 +4861,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5049,10 +4882,8 @@ define void @v_shuffle_v3i32_v4i32__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5066,10 +4897,9 @@ define void @v_shuffle_v3i32_v4i32__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5083,43 +4913,40 @@ define void @v_shuffle_v3i32_v4i32__7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5133,42 +4960,40 @@ define void @v_shuffle_v3i32_v4i32__7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5342,9 +5167,8 @@ define void @v_shuffle_v3i32_v4i32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5352,15 +5176,15 @@ define void @v_shuffle_v3i32_v4i32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5368,15 +5192,15 @@ define void @v_shuffle_v3i32_v4i32__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5394,12 +5218,11 @@ define void @v_shuffle_v3i32_v4i32__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5537,10 +5360,10 @@ define void @v_shuffle_v3i32_v4i32__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5550,10 +5373,10 @@ define void @v_shuffle_v3i32_v4i32__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5563,10 +5386,10 @@ define void @v_shuffle_v3i32_v4i32__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5580,13 +5403,13 @@ define void @v_shuffle_v3i32_v4i32__7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5626,13 +5449,12 @@ define void @v_shuffle_v3i32_v4i32__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5673,16 +5495,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5731,15 +5551,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5787,15 +5606,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5844,16 +5662,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5863,14 +5679,13 @@ define void @v_shuffle_v3i32_v4i32__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5880,14 +5695,13 @@ define void @v_shuffle_v3i32_v4i32__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5901,14 +5715,13 @@ define void @v_shuffle_v3i32_v4i32__7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6162,9 +5975,9 @@ define void @v_shuffle_v3i32_v4i32__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6172,15 +5985,15 @@ define void @v_shuffle_v3i32_v4i32__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6188,15 +6001,15 @@ define void @v_shuffle_v3i32_v4i32__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6214,12 +6027,12 @@ define void @v_shuffle_v3i32_v4i32__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6356,10 +6169,10 @@ define void @v_shuffle_v3i32_v4i32__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6369,10 +6182,10 @@ define void @v_shuffle_v3i32_v4i32__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6382,10 +6195,10 @@ define void @v_shuffle_v3i32_v4i32__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6399,13 +6212,13 @@ define void @v_shuffle_v3i32_v4i32__7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6445,12 +6258,12 @@ define void @v_shuffle_v3i32_v4i32__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6489,15 +6302,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6545,15 +6357,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6601,15 +6412,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6656,15 +6466,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6672,15 +6481,15 @@ define void @v_shuffle_v3i32_v4i32__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6688,15 +6497,15 @@ define void @v_shuffle_v3i32_v4i32__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6759,12 +6568,13 @@ define void @v_shuffle_v3i32_v4i32__7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6802,38 +6612,37 @@ define void @v_shuffle_v3i32_v4i32__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__u_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6920,16 +6729,15 @@ define void @v_shuffle_v3i32_v4i32__1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6937,16 +6745,15 @@ define void @v_shuffle_v3i32_v4i32__1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6967,9 +6774,9 @@ define void @v_shuffle_v3i32_v4i32__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6979,14 +6786,13 @@ define void @v_shuffle_v3i32_v4i32__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6996,14 +6802,13 @@ define void @v_shuffle_v3i32_v4i32__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7021,12 +6826,12 @@ define void @v_shuffle_v3i32_v4i32__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7034,16 +6839,15 @@ define void @v_shuffle_v3i32_v4i32__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7051,16 +6855,16 @@ define void @v_shuffle_v3i32_v4i32__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7135,11 +6939,10 @@ define void @v_shuffle_v3i32_v4i32__5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7149,11 +6952,10 @@ define void @v_shuffle_v3i32_v4i32__5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7167,41 +6969,37 @@ define void @v_shuffle_v3i32_v4i32__6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i32_v4i32__6_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i32_v4i32__6_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7215,12 +7013,12 @@ define void @v_shuffle_v3i32_v4i32__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7261,15 +7059,14 @@ define void @v_shuffle_v3i32_v4i32__7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7319,15 +7116,14 @@ define void @v_shuffle_v3i32_v4i32__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7375,15 +7171,14 @@ define void @v_shuffle_v3i32_v4i32__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7432,15 +7227,14 @@ define void @v_shuffle_v3i32_v4i32__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7450,14 +7244,13 @@ define void @v_shuffle_v3i32_v4i32__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7467,14 +7260,13 @@ define void @v_shuffle_v3i32_v4i32__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7488,13 +7280,13 @@ define void @v_shuffle_v3i32_v4i32__7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7537,13 +7329,13 @@ define void @v_shuffle_v3i32_v4i32__7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7583,12 +7375,13 @@ define void @v_shuffle_v3i32_v4i32__7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i32_v4i32__7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
index bbca5039bb02c..82ec200dae107 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
@@ -58,39 +58,33 @@ define void @v_shuffle_v3i64_v2i64__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -114,39 +108,33 @@ define void @v_shuffle_v3i64_v2i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -160,55 +148,42 @@ define void @v_shuffle_v3i64_v2i64__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -222,49 +197,43 @@ define void @v_shuffle_v3i64_v2i64__3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -291,31 +260,27 @@ define void @v_shuffle_v3i64_v2i64__3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -329,39 +294,40 @@ define void @v_shuffle_v3i64_v2i64__3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -375,51 +341,51 @@ define void @v_shuffle_v3i64_v2i64__3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -433,51 +399,52 @@ define void @v_shuffle_v3i64_v2i64__3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -491,42 +458,42 @@ define void @v_shuffle_v3i64_v2i64__3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -540,42 +507,42 @@ define void @v_shuffle_v3i64_v2i64__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -589,42 +556,36 @@ define void @v_shuffle_v3i64_v2i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -699,32 +660,28 @@ define void @v_shuffle_v3i64_v2i64__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -737,42 +694,36 @@ define void @v_shuffle_v3i64_v2i64__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -785,57 +736,45 @@ define void @v_shuffle_v3i64_v2i64__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -849,52 +788,45 @@ define void @v_shuffle_v3i64_v2i64__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -910,15 +842,15 @@ define void @v_shuffle_v3i64_v2i64__3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -927,15 +859,15 @@ define void @v_shuffle_v3i64_v2i64__3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,15 +876,15 @@ define void @v_shuffle_v3i64_v2i64__3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -966,52 +898,51 @@ define void @v_shuffle_v3i64_v2i64__3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1109,42 +1040,42 @@ define void @v_shuffle_v3i64_v2i64__1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1200,16 +1131,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1217,16 +1146,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1234,16 +1161,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1258,16 +1183,14 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1275,16 +1198,14 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1292,16 +1213,14 @@ define void @v_shuffle_v3i64_v2i64__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1317,16 +1236,12 @@ define void @v_shuffle_v3i64_v2i64__3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1336,16 +1251,12 @@ define void @v_shuffle_v3i64_v2i64__3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1355,16 +1266,12 @@ define void @v_shuffle_v3i64_v2i64__3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1379,57 +1286,52 @@ define void @v_shuffle_v3i64_v2i64__3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1493,39 +1395,33 @@ define void @v_shuffle_v3i64_v2i64__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1563,32 +1459,28 @@ define void @v_shuffle_v3i64_v2i64__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1607,9 +1499,7 @@ define void @v_shuffle_v3i64_v2i64__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1621,9 +1511,7 @@ define void @v_shuffle_v3i64_v2i64__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1635,9 +1523,7 @@ define void @v_shuffle_v3i64_v2i64__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1651,57 +1537,45 @@ define void @v_shuffle_v3i64_v2i64__3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1716,16 +1590,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1733,16 +1605,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1750,16 +1620,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1878,13 +1746,13 @@ define void @v_shuffle_v3i64_v2i64__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1895,13 +1763,13 @@ define void @v_shuffle_v3i64_v2i64__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1912,13 +1780,13 @@ define void @v_shuffle_v3i64_v2i64__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1975,42 +1843,36 @@ define void @v_shuffle_v3i64_v2i64__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2024,57 +1886,45 @@ define void @v_shuffle_v3i64_v2i64__3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v2i64__3_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2089,16 +1939,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2106,16 +1954,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2123,16 +1969,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2160,32 +2004,28 @@ define void @v_shuffle_v3i64_v2i64__3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v2i64__3_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2543,59 +2383,21 @@ define void @s_shuffle_v3i64_v2i64__3_3_u() {
}
define void @s_shuffle_v3i64_v2i64__3_3_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 3, i32 0>
@@ -2665,56 +2467,20 @@ define void @s_shuffle_v3i64_v2i64__3_3_1() {
}
define void @s_shuffle_v3i64_v2i64__3_3_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 3, i32 2>
@@ -2745,50 +2511,18 @@ define void @s_shuffle_v3i64_v2i64__3_3_3() {
}
define void @s_shuffle_v3i64_v2i64__u_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 poison, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -2817,56 +2551,20 @@ define void @s_shuffle_v3i64_v2i64__0_0_0() {
}
define void @s_shuffle_v3i64_v2i64__1_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__1_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 1, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -2874,50 +2572,18 @@ define void @s_shuffle_v3i64_v2i64__1_0_0() {
}
define void @s_shuffle_v3i64_v2i64__2_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 2, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -2929,17 +2595,15 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -2949,17 +2613,15 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -2969,17 +2631,15 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2996,15 +2656,13 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -3014,15 +2672,13 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -3032,15 +2688,13 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3057,17 +2711,15 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -3077,17 +2729,15 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -3097,17 +2747,15 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3129,12 +2777,10 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -3149,12 +2795,10 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -3164,17 +2808,15 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3634,56 +3276,20 @@ define void @s_shuffle_v3i64_v2i64__2_2_2() {
}
define void @s_shuffle_v3i64_v2i64__3_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 2, i32 2>
@@ -3692,50 +3298,18 @@ define void @s_shuffle_v3i64_v2i64__3_2_2() {
}
define void @s_shuffle_v3i64_v2i64__3_u_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_u_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 poison, i32 2>
@@ -3787,14 +3361,12 @@ define void @s_shuffle_v3i64_v2i64__3_0_2() {
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3807,59 +3379,21 @@ define void @s_shuffle_v3i64_v2i64__3_0_2() {
}
define void @s_shuffle_v3i64_v2i64__3_1_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_1_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_1_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_1_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_1_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 1, i32 2>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
index f15dd7d2772e5..2fca4c2f1ff3a 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
@@ -100,39 +100,33 @@ define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -196,39 +190,33 @@ define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -242,55 +230,42 @@ define void @v_shuffle_v3i64_v3i64__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -304,49 +279,43 @@ define void @v_shuffle_v3i64_v3i64__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -360,49 +329,43 @@ define void @v_shuffle_v3i64_v3i64__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -416,45 +379,40 @@ define void @v_shuffle_v3i64_v3i64__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -468,39 +426,40 @@ define void @v_shuffle_v3i64_v3i64__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -514,39 +473,40 @@ define void @v_shuffle_v3i64_v3i64__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -560,51 +520,51 @@ define void @v_shuffle_v3i64_v3i64__5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -618,51 +578,52 @@ define void @v_shuffle_v3i64_v3i64__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -676,51 +637,52 @@ define void @v_shuffle_v3i64_v3i64__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -734,42 +696,42 @@ define void @v_shuffle_v3i64_v3i64__5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -783,42 +745,42 @@ define void @v_shuffle_v3i64_v3i64__5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -832,42 +794,42 @@ define void @v_shuffle_v3i64_v3i64__5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -881,42 +843,36 @@ define void @v_shuffle_v3i64_v3i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1025,48 +981,42 @@ define void @v_shuffle_v3i64_v3i64__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1079,42 +1029,36 @@ define void @v_shuffle_v3i64_v3i64__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1185,57 +1129,45 @@ define void @v_shuffle_v3i64_v3i64__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1249,52 +1181,45 @@ define void @v_shuffle_v3i64_v3i64__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1308,52 +1233,51 @@ define void @v_shuffle_v3i64_v3i64__5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1367,51 +1291,51 @@ define void @v_shuffle_v3i64_v3i64__5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1425,52 +1349,51 @@ define void @v_shuffle_v3i64_v3i64__5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1484,51 +1407,51 @@ define void @v_shuffle_v3i64_v3i64__5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1674,42 +1597,42 @@ define void @v_shuffle_v3i64_v3i64__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1823,16 +1746,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1840,16 +1761,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1857,16 +1776,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1881,16 +1798,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1898,16 +1813,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1915,16 +1828,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1938,57 +1849,45 @@ define void @v_shuffle_v3i64_v3i64__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2002,51 +1901,51 @@ define void @v_shuffle_v3i64_v3i64__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2062,18 +1961,16 @@ define void @v_shuffle_v3i64_v3i64__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_1:
@@ -2081,17 +1978,15 @@ define void @v_shuffle_v3i64_v3i64__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2100,18 +1995,16 @@ define void @v_shuffle_v3i64_v3i64__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2125,16 +2018,16 @@ define void @v_shuffle_v3i64_v3i64__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2142,16 +2035,16 @@ define void @v_shuffle_v3i64_v3i64__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2159,16 +2052,17 @@ define void @v_shuffle_v3i64_v3i64__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2315,42 +2209,42 @@ define void @v_shuffle_v3i64_v3i64__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2464,16 +2358,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2481,16 +2373,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2498,16 +2388,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2522,16 +2410,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2539,16 +2425,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2556,16 +2440,14 @@ define void @v_shuffle_v3i64_v3i64__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2581,17 +2463,13 @@ define void @v_shuffle_v3i64_v3i64__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2600,17 +2478,13 @@ define void @v_shuffle_v3i64_v3i64__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2619,17 +2493,13 @@ define void @v_shuffle_v3i64_v3i64__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2643,51 +2513,46 @@ define void @v_shuffle_v3i64_v3i64__5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2701,57 +2566,52 @@ define void @v_shuffle_v3i64_v3i64__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2765,51 +2625,52 @@ define void @v_shuffle_v3i64_v3i64__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2915,39 +2776,33 @@ define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3020,48 +2875,42 @@ define void @v_shuffle_v3i64_v3i64__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3080,9 +2929,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3094,9 +2941,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3108,9 +2953,7 @@ define void @v_shuffle_v3i64_v3i64__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3124,57 +2967,45 @@ define void @v_shuffle_v3i64_v3i64__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3189,16 +3020,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3206,16 +3035,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3223,16 +3050,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3247,16 +3072,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3264,16 +3087,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3281,16 +3102,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3304,42 +3123,42 @@ define void @v_shuffle_v3i64_v3i64__5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3516,13 +3335,13 @@ define void @v_shuffle_v3i64_v3i64__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3533,13 +3352,13 @@ define void @v_shuffle_v3i64_v3i64__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3550,13 +3369,13 @@ define void @v_shuffle_v3i64_v3i64__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3662,42 +3481,42 @@ define void @v_shuffle_v3i64_v3i64__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3711,42 +3530,36 @@ define void @v_shuffle_v3i64_v3i64__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3760,57 +3573,45 @@ define void @v_shuffle_v3i64_v3i64__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3825,16 +3626,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3842,16 +3641,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3859,16 +3656,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3883,16 +3678,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3900,16 +3693,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3917,16 +3708,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3940,48 +3729,42 @@ define void @v_shuffle_v3i64_v3i64__5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4158,13 +3941,13 @@ define void @v_shuffle_v3i64_v3i64__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4175,13 +3958,13 @@ define void @v_shuffle_v3i64_v3i64__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4192,13 +3975,13 @@ define void @v_shuffle_v3i64_v3i64__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4304,42 +4087,36 @@ define void @v_shuffle_v3i64_v3i64__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4353,57 +4130,45 @@ define void @v_shuffle_v3i64_v3i64__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4418,16 +4183,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4435,16 +4198,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4452,16 +4213,14 @@ define void @v_shuffle_v3i64_v3i64__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4476,16 +4235,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4493,16 +4250,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4510,16 +4265,14 @@ define void @v_shuffle_v3i64_v3i64__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4533,48 +4286,42 @@ define void @v_shuffle_v3i64_v3i64__5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4588,42 +4335,42 @@ define void @v_shuffle_v3i64_v3i64__5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v3i64__5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v3i64__5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4759,10 +4506,9 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4860,10 +4606,9 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4880,15 +4625,13 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -4898,15 +4641,13 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -4969,11 +4710,11 @@ define void @s_shuffle_v3i64_v3i64__5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5024,13 +4765,11 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5043,46 +4782,18 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
}
define void @s_shuffle_v3i64_v3i64__5_3_u() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_3_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 poison>
@@ -5095,10 +4806,10 @@ define void @s_shuffle_v3i64_v3i64__5_4_u() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -5111,50 +4822,18 @@ define void @s_shuffle_v3i64_v3i64__5_4_u() {
}
define void @s_shuffle_v3i64_v3i64__5_5_u() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 poison>
@@ -5163,65 +4842,21 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() {
}
define void @s_shuffle_v3i64_v3i64__5_5_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 0>
@@ -5234,17 +4869,15 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5254,17 +4887,15 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5274,16 +4905,14 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -5304,12 +4933,10 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() {
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5322,12 +4949,10 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() {
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5339,13 +4964,12 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5358,52 +4982,20 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() {
}
define void @s_shuffle_v3i64_v3i64__5_5_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 3>
@@ -5412,74 +5004,38 @@ define void @s_shuffle_v3i64_v3i64__5_5_3() {
}
define void @s_shuffle_v3i64_v3i64__5_5_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
define void @s_shuffle_v3i64_v3i64__5_5_5() {
; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -5492,50 +5048,18 @@ define void @s_shuffle_v3i64_v3i64__5_5_5() {
}
define void @s_shuffle_v3i64_v3i64__u_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -5564,56 +5088,20 @@ define void @s_shuffle_v3i64_v3i64__0_0_0() {
}
define void @s_shuffle_v3i64_v3i64__1_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -5621,52 +5109,20 @@ define void @s_shuffle_v3i64_v3i64__1_0_0() {
}
define void @s_shuffle_v3i64_v3i64__2_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -5674,50 +5130,18 @@ define void @s_shuffle_v3i64_v3i64__2_0_0() {
}
define void @s_shuffle_v3i64_v3i64__3_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -5732,14 +5156,12 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5752,14 +5174,12 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5769,17 +5189,15 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5792,63 +5210,21 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() {
}
define void @s_shuffle_v3i64_v3i64__5_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
@@ -5857,307 +5233,140 @@ define void @s_shuffle_v3i64_v3i64__5_0_0() {
}
define void @s_shuffle_v3i64_v3i64__5_u_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__5_1_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__5_2_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__5_3_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__5_4_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_0:
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_0:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_0:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_1_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_2_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_3_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_4_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 0>
@@ -6229,12 +5438,12 @@ define void @s_shuffle_v3i64_v3i64__2_1_1() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -6364,12 +5573,10 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6387,15 +5594,13 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6405,15 +5610,13 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6446,17 +5649,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6466,17 +5667,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6507,61 +5706,23 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() {
}
define void @s_shuffle_v3i64_v3i64__5_2_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_2_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_2_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_2_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_2_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 1>
@@ -6574,17 +5735,15 @@ define void @s_shuffle_v3i64_v3i64__5_3_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6594,17 +5753,15 @@ define void @s_shuffle_v3i64_v3i64__5_3_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6639,15 +5796,15 @@ define void @s_shuffle_v3i64_v3i64__5_4_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6657,15 +5814,15 @@ define void @s_shuffle_v3i64_v3i64__5_4_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6675,14 +5832,14 @@ define void @s_shuffle_v3i64_v3i64__5_4_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6759,12 +5916,12 @@ define void @s_shuffle_v3i64_v3i64__2_2_2() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -6894,12 +6051,10 @@ define void @s_shuffle_v3i64_v3i64__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6947,11 +6102,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6964,61 +6119,23 @@ define void @s_shuffle_v3i64_v3i64__5_u_2() {
}
define void @s_shuffle_v3i64_v3i64__5_0_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_0_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_0_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_0_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_0_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 2>
@@ -7061,11 +6178,11 @@ define void @s_shuffle_v3i64_v3i64__5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7116,13 +6233,12 @@ define void @s_shuffle_v3i64_v3i64__5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7139,15 +6255,13 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7157,15 +6271,13 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7177,13 +6289,12 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7256,252 +6367,155 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__3_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v3i64__5_u_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
+define void @s_shuffle_v3i64_v3i64__2_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__3_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__4_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_u_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_u_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 3>
@@ -7514,15 +6528,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7532,15 +6546,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7609,13 +6623,12 @@ define void @s_shuffle_v3i64_v3i64__5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7668,15 +6681,15 @@ define void @s_shuffle_v3i64_v3i64__5_2_3() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7689,52 +6702,20 @@ define void @s_shuffle_v3i64_v3i64__5_2_3() {
}
define void @s_shuffle_v3i64_v3i64__5_4_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 3>
@@ -7892,12 +6873,12 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7910,12 +6891,12 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7925,15 +6906,16 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7986,70 +6968,42 @@ define void @s_shuffle_v3i64_v3i64__4_4_4() {
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-
-define void @s_shuffle_v3i64_v3i64__5_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__5_u_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+
+define void @s_shuffle_v3i64_v3i64__5_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_u_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_u_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 4>
@@ -8062,17 +7016,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8082,17 +7034,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8161,13 +7111,12 @@ define void @s_shuffle_v3i64_v3i64__5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8220,17 +7169,15 @@ define void @s_shuffle_v3i64_v3i64__5_2_4() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8243,52 +7190,20 @@ define void @s_shuffle_v3i64_v3i64__5_2_4() {
}
define void @s_shuffle_v3i64_v3i64__5_3_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_3_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 4>
@@ -8446,12 +7361,12 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8464,12 +7379,12 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8479,15 +7394,16 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8546,10 +7462,10 @@ define void @s_shuffle_v3i64_v3i64__5_u_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -8566,15 +7482,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8584,15 +7500,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8602,15 +7518,15 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8630,12 +7546,10 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() {
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8648,12 +7562,10 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() {
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8665,13 +7577,12 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8688,15 +7599,15 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8706,15 +7617,15 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8726,13 +7637,13 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8745,56 +7656,20 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() {
}
define void @s_shuffle_v3i64_v3i64__5_3_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_3_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_3_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 5>
@@ -8807,10 +7682,12 @@ define void @s_shuffle_v3i64_v3i64__5_4_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
index 6e156d2d4a2f5..58a2146c00de9 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
@@ -139,39 +139,33 @@ define void @v_shuffle_v3i64_v4i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -275,39 +269,33 @@ define void @v_shuffle_v3i64_v4i64__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -321,55 +309,42 @@ define void @v_shuffle_v3i64_v4i64__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -383,49 +358,43 @@ define void @v_shuffle_v3i64_v4i64__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -439,49 +408,43 @@ define void @v_shuffle_v3i64_v4i64__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -495,49 +458,43 @@ define void @v_shuffle_v3i64_v4i64__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -551,45 +508,40 @@ define void @v_shuffle_v3i64_v4i64__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -603,39 +555,40 @@ define void @v_shuffle_v3i64_v4i64__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -649,39 +602,40 @@ define void @v_shuffle_v3i64_v4i64__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -695,39 +649,40 @@ define void @v_shuffle_v3i64_v4i64__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -741,51 +696,51 @@ define void @v_shuffle_v3i64_v4i64__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -799,51 +754,52 @@ define void @v_shuffle_v3i64_v4i64__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -857,51 +813,52 @@ define void @v_shuffle_v3i64_v4i64__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -915,51 +872,52 @@ define void @v_shuffle_v3i64_v4i64__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -973,42 +931,42 @@ define void @v_shuffle_v3i64_v4i64__7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1022,42 +980,42 @@ define void @v_shuffle_v3i64_v4i64__7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1071,42 +1029,42 @@ define void @v_shuffle_v3i64_v4i64__7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1120,42 +1078,42 @@ define void @v_shuffle_v3i64_v4i64__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1169,42 +1127,36 @@ define void @v_shuffle_v3i64_v4i64__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1361,48 +1313,42 @@ define void @v_shuffle_v3i64_v4i64__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1415,42 +1361,36 @@ define void @v_shuffle_v3i64_v4i64__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__4_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1579,57 +1519,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1643,52 +1571,45 @@ define void @v_shuffle_v3i64_v4i64__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1702,52 +1623,51 @@ define void @v_shuffle_v3i64_v4i64__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1761,51 +1681,51 @@ define void @v_shuffle_v3i64_v4i64__7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1819,51 +1739,51 @@ define void @v_shuffle_v3i64_v4i64__7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1877,52 +1797,51 @@ define void @v_shuffle_v3i64_v4i64__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1936,51 +1855,51 @@ define void @v_shuffle_v3i64_v4i64__7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1994,51 +1913,51 @@ define void @v_shuffle_v3i64_v4i64__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2232,42 +2151,42 @@ define void @v_shuffle_v3i64_v4i64__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2439,16 +2358,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2456,16 +2373,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2473,16 +2388,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2497,16 +2410,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2514,16 +2425,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2531,16 +2440,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2554,57 +2461,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2620,15 +2515,15 @@ define void @v_shuffle_v3i64_v4i64__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2637,15 +2532,15 @@ define void @v_shuffle_v3i64_v4i64__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2654,16 +2549,15 @@ define void @v_shuffle_v3i64_v4i64__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2677,51 +2571,51 @@ define void @v_shuffle_v3i64_v4i64__7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2735,58 +2629,52 @@ define void @v_shuffle_v3i64_v4i64__7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2800,51 +2688,52 @@ define void @v_shuffle_v3i64_v4i64__7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2858,51 +2747,52 @@ define void @v_shuffle_v3i64_v4i64__7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3096,42 +2986,42 @@ define void @v_shuffle_v3i64_v4i64__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3303,16 +3193,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3320,16 +3208,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3337,16 +3223,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3361,16 +3245,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3378,16 +3260,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3395,16 +3275,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3418,57 +3296,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3482,51 +3348,46 @@ define void @v_shuffle_v3i64_v4i64__7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3540,51 +3401,51 @@ define void @v_shuffle_v3i64_v4i64__7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3598,57 +3459,52 @@ define void @v_shuffle_v3i64_v4i64__7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3662,51 +3518,52 @@ define void @v_shuffle_v3i64_v4i64__7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3720,51 +3577,52 @@ define void @v_shuffle_v3i64_v4i64__7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v12
-; GFX900-NEXT: v_mov_b32_e32 v9, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v12
-; GFX90A-NEXT: v_mov_b32_e32 v9, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3958,42 +3816,42 @@ define void @v_shuffle_v3i64_v4i64__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4165,16 +4023,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4182,16 +4038,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4199,16 +4053,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4223,16 +4075,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4240,16 +4090,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4257,16 +4105,14 @@ define void @v_shuffle_v3i64_v4i64__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4280,57 +4126,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4344,51 +4178,46 @@ define void @v_shuffle_v3i64_v4i64__7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4402,51 +4231,46 @@ define void @v_shuffle_v3i64_v4i64__7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v14
-; GFX900-NEXT: v_mov_b32_e32 v3, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4460,57 +4284,52 @@ define void @v_shuffle_v3i64_v4i64__7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4524,51 +4343,52 @@ define void @v_shuffle_v3i64_v4i64__7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v14
-; GFX900-NEXT: v_mov_b32_e32 v9, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v14
-; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4582,51 +4402,52 @@ define void @v_shuffle_v3i64_v4i64__7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v14
-; GFX900-NEXT: v_mov_b32_e32 v11, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v14
-; GFX90A-NEXT: v_mov_b32_e32 v11, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4771,39 +4592,33 @@ define void @v_shuffle_v3i64_v4i64__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4925,48 +4740,42 @@ define void @v_shuffle_v3i64_v4i64__7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4985,9 +4794,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4999,9 +4806,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5013,9 +4818,7 @@ define void @v_shuffle_v3i64_v4i64__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5029,57 +4832,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5094,16 +4885,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5111,16 +4900,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5128,16 +4915,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5152,16 +4937,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5169,16 +4952,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5186,16 +4967,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5210,16 +4989,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5227,16 +5004,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5244,16 +5019,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5267,42 +5040,42 @@ define void @v_shuffle_v3i64_v4i64__7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5316,42 +5089,42 @@ define void @v_shuffle_v3i64_v4i64__7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5586,13 +5359,13 @@ define void @v_shuffle_v3i64_v4i64__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5603,13 +5376,13 @@ define void @v_shuffle_v3i64_v4i64__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5620,13 +5393,13 @@ define void @v_shuffle_v3i64_v4i64__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5781,42 +5554,42 @@ define void @v_shuffle_v3i64_v4i64__7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5830,42 +5603,36 @@ define void @v_shuffle_v3i64_v4i64__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5879,57 +5646,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5944,16 +5699,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5961,16 +5714,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5978,16 +5729,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6002,16 +5751,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6019,16 +5766,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6036,16 +5781,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6060,16 +5803,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6077,16 +5818,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6094,16 +5833,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6117,48 +5854,42 @@ define void @v_shuffle_v3i64_v4i64__7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6172,42 +5903,42 @@ define void @v_shuffle_v3i64_v4i64__7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6442,13 +6173,13 @@ define void @v_shuffle_v3i64_v4i64__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6459,13 +6190,13 @@ define void @v_shuffle_v3i64_v4i64__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
; GFX90A-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6476,13 +6207,13 @@ define void @v_shuffle_v3i64_v4i64__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6637,42 +6368,42 @@ define void @v_shuffle_v3i64_v4i64__7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6686,42 +6417,36 @@ define void @v_shuffle_v3i64_v4i64__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6735,57 +6460,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6800,16 +6513,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6817,16 +6528,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6834,16 +6543,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6858,16 +6565,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6875,16 +6580,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6892,16 +6595,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6916,16 +6617,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6933,16 +6632,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6950,16 +6647,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6973,48 +6668,42 @@ define void @v_shuffle_v3i64_v4i64__7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7028,42 +6717,42 @@ define void @v_shuffle_v3i64_v4i64__7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7298,13 +6987,13 @@ define void @v_shuffle_v3i64_v4i64__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7315,13 +7004,13 @@ define void @v_shuffle_v3i64_v4i64__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
; GFX90A-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7332,13 +7021,13 @@ define void @v_shuffle_v3i64_v4i64__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7493,42 +7182,36 @@ define void @v_shuffle_v3i64_v4i64__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_u_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7542,57 +7225,45 @@ define void @v_shuffle_v3i64_v4i64__7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_0_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_0_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_0_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7607,16 +7278,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7624,16 +7293,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7641,16 +7308,14 @@ define void @v_shuffle_v3i64_v4i64__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7665,16 +7330,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7682,16 +7345,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7699,16 +7360,14 @@ define void @v_shuffle_v3i64_v4i64__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7723,16 +7382,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7740,16 +7397,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7757,16 +7412,14 @@ define void @v_shuffle_v3i64_v4i64__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7780,48 +7433,42 @@ define void @v_shuffle_v3i64_v4i64__7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_4_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_4_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7835,42 +7482,42 @@ define void @v_shuffle_v3i64_v4i64__7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_5_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7884,42 +7531,42 @@ define void @v_shuffle_v3i64_v4i64__7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3i64_v4i64__7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3i64_v4i64__7_6_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_6_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8055,10 +7702,9 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8201,10 +7847,9 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8477,15 +8122,14 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8622,134 +8266,62 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() {
}
define void @s_shuffle_v3i64_v4i64__7_7_u() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_u:
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_7_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_7_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_7_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_7_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8759,17 +8331,15 @@ define void @s_shuffle_v3i64_v4i64__7_7_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8838,13 +8408,12 @@ define void @s_shuffle_v3i64_v4i64__7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8899,15 +8468,13 @@ define void @s_shuffle_v3i64_v4i64__7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8920,52 +8487,20 @@ define void @s_shuffle_v3i64_v4i64__7_7_3() {
}
define void @s_shuffle_v3i64_v4i64__7_7_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
@@ -8974,1103 +8509,58 @@ define void @s_shuffle_v3i64_v4i64__7_7_4() {
}
define void @s_shuffle_v3i64_v4i64__7_7_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_7_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_7_7() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> zeroinitializer
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__1_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__2_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__3_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__6_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_0_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_u_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_1_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_2_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_3_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_4_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_5_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_6_0() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
+define void @s_shuffle_v3i64_v4i64__7_7_6() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
+define void @s_shuffle_v3i64_v4i64__7_7_7() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
; GFX9-NEXT: s_mov_b32 s12, s10
; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
@@ -10078,473 +8568,397 @@ define void @s_shuffle_v3i64_v4i64__0_1_1() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
+define void @s_shuffle_v3i64_v4i64__u_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
+define void @s_shuffle_v3i64_v4i64__0_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
+define void @s_shuffle_v3i64_v4i64__1_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s14
; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
+define void @s_shuffle_v3i64_v4i64__2_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3i64_v4i64__3_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3i64_v4i64__4_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+define void @s_shuffle_v3i64_v4i64__5_0_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+define void @s_shuffle_v3i64_v4i64__6_0_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_0_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+define void @s_shuffle_v3i64_v4i64__7_u_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+define void @s_shuffle_v3i64_v4i64__7_1_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+define void @s_shuffle_v3i64_v4i64__7_2_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10553,16 +8967,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10571,63 +8985,84 @@ define void @s_shuffle_v3i64_v4i64__7_3_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+define void @s_shuffle_v3i64_v4i64__7_3_0() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_4_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10636,45 +9071,41 @@ define void @s_shuffle_v3i64_v4i64__7_4_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+define void @s_shuffle_v3i64_v4i64__7_5_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10685,14 +9116,14 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10703,14 +9134,14 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10721,126 +9152,120 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+define void @s_shuffle_v3i64_v4i64__7_6_0() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+define void @s_shuffle_v3i64_v4i64__u_1_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+define void @s_shuffle_v3i64_v4i64__0_1_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+define void @s_shuffle_v3i64_v4i64__1_1_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
@@ -10848,41 +9273,41 @@ define void @s_shuffle_v3i64_v4i64__1_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+define void @s_shuffle_v3i64_v4i64__2_1_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+define void @s_shuffle_v3i64_v4i64__3_1_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
@@ -10890,75 +9315,75 @@ define void @s_shuffle_v3i64_v4i64__3_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s14
; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+define void @s_shuffle_v3i64_v4i64__4_1_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+define void @s_shuffle_v3i64_v4i64__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10969,171 +9394,118 @@ define void @s_shuffle_v3i64_v4i64__5_2_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__6_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+define void @s_shuffle_v3i64_v4i64__6_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+define void @s_shuffle_v3i64_v4i64__7_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11144,177 +9516,189 @@ define void @s_shuffle_v3i64_v4i64__7_u_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+define void @s_shuffle_v3i64_v4i64__7_u_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+define void @s_shuffle_v3i64_v4i64__7_0_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+define void @s_shuffle_v3i64_v4i64__7_2_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11323,120 +9707,153 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+define void @s_shuffle_v3i64_v4i64__7_3_1() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_4_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+define void @s_shuffle_v3i64_v4i64__7_5_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11447,57 +9864,61 @@ define void @s_shuffle_v3i64_v4i64__7_5_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+define void @s_shuffle_v3i64_v4i64__7_6_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11506,357 +9927,418 @@ define void @s_shuffle_v3i64_v4i64__7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__u_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3i64_v4i64__u_2_2() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+define void @s_shuffle_v3i64_v4i64__0_2_2() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+define void @s_shuffle_v3i64_v4i64__1_2_2() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__2_2_2() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__3_2_2() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__4_2_2() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+define void @s_shuffle_v3i64_v4i64__6_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+define void @s_shuffle_v3i64_v4i64__7_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+define void @s_shuffle_v3i64_v4i64__7_u_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+define void @s_shuffle_v3i64_v4i64__7_0_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11865,61 +10347,57 @@ define void @s_shuffle_v3i64_v4i64__5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+define void @s_shuffle_v3i64_v4i64__7_1_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11928,25 +10406,21 @@ define void @s_shuffle_v3i64_v4i64__6_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+define void @s_shuffle_v3i64_v4i64__7_3_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11959,14 +10433,12 @@ define void @s_shuffle_v3i64_v4i64__7_3_3() {
; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11979,77 +10451,73 @@ define void @s_shuffle_v3i64_v4i64__7_3_3() {
; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+define void @s_shuffle_v3i64_v4i64__7_4_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12058,63 +10526,59 @@ define void @s_shuffle_v3i64_v4i64__7_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+define void @s_shuffle_v3i64_v4i64__7_5_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12125,23 +10589,21 @@ define void @s_shuffle_v3i64_v4i64__7_0_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+define void @s_shuffle_v3i64_v4i64__7_6_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -12152,14 +10614,14 @@ define void @s_shuffle_v3i64_v4i64__7_1_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -12170,14 +10632,14 @@ define void @s_shuffle_v3i64_v4i64__7_1_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12188,88 +10650,202 @@ define void @s_shuffle_v3i64_v4i64__7_1_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+define void @s_shuffle_v3i64_v4i64__u_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__1_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__2_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__3_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__4_3_3() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+define void @s_shuffle_v3i64_v4i64__6_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -12278,10 +10854,8 @@ define void @s_shuffle_v3i64_v4i64__7_4_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -12289,7 +10863,7 @@ define void @s_shuffle_v3i64_v4i64__7_4_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -12298,10 +10872,8 @@ define void @s_shuffle_v3i64_v4i64__7_4_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -12309,135 +10881,129 @@ define void @s_shuffle_v3i64_v4i64__7_4_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+define void @s_shuffle_v3i64_v4i64__7_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+define void @s_shuffle_v3i64_v4i64__7_u_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12448,8 +11014,6 @@ define void @s_shuffle_v3i64_v4i64__7_6_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s6
; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -12458,620 +11022,687 @@ define void @s_shuffle_v3i64_v4i64__7_6_3() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+define void @s_shuffle_v3i64_v4i64__7_0_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__1_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+define void @s_shuffle_v3i64_v4i64__7_1_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+define void @s_shuffle_v3i64_v4i64__7_2_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+define void @s_shuffle_v3i64_v4i64__7_4_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+define void @s_shuffle_v3i64_v4i64__7_5_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+define void @s_shuffle_v3i64_v4i64__7_6_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+define void @s_shuffle_v3i64_v4i64__u_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+define void @s_shuffle_v3i64_v4i64__1_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+define void @s_shuffle_v3i64_v4i64__2_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+define void @s_shuffle_v3i64_v4i64__3_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__4_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+define void @s_shuffle_v3i64_v4i64__6_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_u_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_0_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13082,12 +11713,14 @@ define void @s_shuffle_v3i64_v4i64__7_3_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13098,12 +11731,14 @@ define void @s_shuffle_v3i64_v4i64__7_3_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13114,124 +11749,196 @@ define void @s_shuffle_v3i64_v4i64__7_3_4() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s18
; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+define void @s_shuffle_v3i64_v4i64__7_1_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+define void @s_shuffle_v3i64_v4i64__7_2_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_3_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_5_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_6_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
@@ -13386,15 +12093,15 @@ define void @s_shuffle_v3i64_v4i64__2_5_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -13404,15 +12111,15 @@ define void @s_shuffle_v3i64_v4i64__2_5_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -13422,15 +12129,16 @@ define void @s_shuffle_v3i64_v4i64__2_5_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -13550,12 +12258,12 @@ define void @s_shuffle_v3i64_v4i64__6_5_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -13878,17 +12586,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14163,15 +12870,15 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14181,15 +12888,15 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14199,15 +12906,16 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14327,12 +13035,12 @@ define void @s_shuffle_v3i64_v4i64__6_6_6() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -14574,15 +13282,15 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14592,15 +13300,15 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14610,15 +13318,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14671,88 +13380,56 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_5_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_7_7() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_5_6() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_7_7() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
@@ -14826,17 +13503,15 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14846,17 +13521,15 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14866,17 +13539,16 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14929,17 +13601,16 @@ define void @s_shuffle_v3i64_v4i64__2_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14956,17 +13627,15 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14976,17 +13645,15 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14998,15 +13665,14 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15041,56 +13707,20 @@ define void @s_shuffle_v3i64_v4i64__4_7_7() {
}
define void @s_shuffle_v3i64_v4i64__5_7_7() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
@@ -15099,56 +13729,18 @@ define void @s_shuffle_v3i64_v4i64__5_7_7() {
}
define void @s_shuffle_v3i64_v4i64__6_7_7() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
@@ -15444,17 +14036,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
index 8757639c501d2..ff8ddd031858f 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
@@ -58,39 +58,33 @@ define void @v_shuffle_v3p0_v2p0__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -114,39 +108,33 @@ define void @v_shuffle_v3p0_v2p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -160,55 +148,42 @@ define void @v_shuffle_v3p0_v2p0__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -222,49 +197,43 @@ define void @v_shuffle_v3p0_v2p0__3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -291,31 +260,27 @@ define void @v_shuffle_v3p0_v2p0__3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -329,39 +294,40 @@ define void @v_shuffle_v3p0_v2p0__3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -375,51 +341,51 @@ define void @v_shuffle_v3p0_v2p0__3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -433,51 +399,52 @@ define void @v_shuffle_v3p0_v2p0__3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -491,42 +458,42 @@ define void @v_shuffle_v3p0_v2p0__3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -540,42 +507,42 @@ define void @v_shuffle_v3p0_v2p0__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -589,42 +556,36 @@ define void @v_shuffle_v3p0_v2p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -699,32 +660,28 @@ define void @v_shuffle_v3p0_v2p0__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -737,42 +694,36 @@ define void @v_shuffle_v3p0_v2p0__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -785,57 +736,45 @@ define void @v_shuffle_v3p0_v2p0__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -849,52 +788,45 @@ define void @v_shuffle_v3p0_v2p0__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -910,15 +842,15 @@ define void @v_shuffle_v3p0_v2p0__3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -927,15 +859,15 @@ define void @v_shuffle_v3p0_v2p0__3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -944,15 +876,15 @@ define void @v_shuffle_v3p0_v2p0__3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -966,52 +898,51 @@ define void @v_shuffle_v3p0_v2p0__3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1109,42 +1040,42 @@ define void @v_shuffle_v3p0_v2p0__1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1200,16 +1131,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1217,16 +1146,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1234,16 +1161,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1258,16 +1183,14 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1275,16 +1198,14 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1292,16 +1213,14 @@ define void @v_shuffle_v3p0_v2p0__3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1317,16 +1236,12 @@ define void @v_shuffle_v3p0_v2p0__3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -1336,16 +1251,12 @@ define void @v_shuffle_v3p0_v2p0__3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1355,16 +1266,12 @@ define void @v_shuffle_v3p0_v2p0__3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1379,57 +1286,52 @@ define void @v_shuffle_v3p0_v2p0__3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1493,39 +1395,33 @@ define void @v_shuffle_v3p0_v2p0__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1563,32 +1459,28 @@ define void @v_shuffle_v3p0_v2p0__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1607,9 +1499,7 @@ define void @v_shuffle_v3p0_v2p0__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1621,9 +1511,7 @@ define void @v_shuffle_v3p0_v2p0__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v4, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1635,9 +1523,7 @@ define void @v_shuffle_v3p0_v2p0__3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1651,57 +1537,45 @@ define void @v_shuffle_v3p0_v2p0__3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1716,16 +1590,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1733,16 +1605,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1750,16 +1620,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1878,13 +1746,13 @@ define void @v_shuffle_v3p0_v2p0__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1895,13 +1763,13 @@ define void @v_shuffle_v3p0_v2p0__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1912,13 +1780,13 @@ define void @v_shuffle_v3p0_v2p0__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1975,42 +1843,36 @@ define void @v_shuffle_v3p0_v2p0__3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2024,57 +1886,45 @@ define void @v_shuffle_v3p0_v2p0__3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v2p0__3_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2089,16 +1939,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2106,16 +1954,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2123,16 +1969,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2160,32 +2004,28 @@ define void @v_shuffle_v3p0_v2p0__3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v2p0__3_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx2 v4, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2543,59 +2383,21 @@ define void @s_shuffle_v3p0_v2p0__3_3_u() {
}
define void @s_shuffle_v3p0_v2p0__3_3_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 3, i32 0>
@@ -2665,56 +2467,20 @@ define void @s_shuffle_v3p0_v2p0__3_3_1() {
}
define void @s_shuffle_v3p0_v2p0__3_3_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 3, i32 2>
@@ -2745,50 +2511,18 @@ define void @s_shuffle_v3p0_v2p0__3_3_3() {
}
define void @s_shuffle_v3p0_v2p0__u_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 poison, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -2817,56 +2551,20 @@ define void @s_shuffle_v3p0_v2p0__0_0_0() {
}
define void @s_shuffle_v3p0_v2p0__1_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__1_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 1, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -2874,50 +2572,18 @@ define void @s_shuffle_v3p0_v2p0__1_0_0() {
}
define void @s_shuffle_v3p0_v2p0__2_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 2, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -2929,17 +2595,15 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -2949,17 +2613,15 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -2969,17 +2631,15 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2996,15 +2656,13 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -3014,15 +2672,13 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -3032,15 +2688,13 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3057,17 +2711,15 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -3077,17 +2729,15 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -3097,17 +2747,15 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3129,12 +2777,10 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -3149,12 +2795,10 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -3164,17 +2808,15 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3634,56 +3276,20 @@ define void @s_shuffle_v3p0_v2p0__2_2_2() {
}
define void @s_shuffle_v3p0_v2p0__3_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 2, i32 2>
@@ -3692,50 +3298,18 @@ define void @s_shuffle_v3p0_v2p0__3_2_2() {
}
define void @s_shuffle_v3p0_v2p0__3_u_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_u_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 poison, i32 2>
@@ -3787,14 +3361,12 @@ define void @s_shuffle_v3p0_v2p0__3_0_2() {
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:7]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3807,59 +3379,21 @@ define void @s_shuffle_v3p0_v2p0__3_0_2() {
}
define void @s_shuffle_v3p0_v2p0__3_1_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_1_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_1_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_1_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_1_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 1, i32 2>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
index b6f4e3091b61f..8e0c74dedb69c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
@@ -100,39 +100,33 @@ define void @v_shuffle_v3p0_v3p0__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -196,39 +190,33 @@ define void @v_shuffle_v3p0_v3p0__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -242,55 +230,42 @@ define void @v_shuffle_v3p0_v3p0__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -304,49 +279,43 @@ define void @v_shuffle_v3p0_v3p0__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -360,49 +329,43 @@ define void @v_shuffle_v3p0_v3p0__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -416,45 +379,40 @@ define void @v_shuffle_v3p0_v3p0__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -468,39 +426,40 @@ define void @v_shuffle_v3p0_v3p0__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -514,39 +473,40 @@ define void @v_shuffle_v3p0_v3p0__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -560,51 +520,51 @@ define void @v_shuffle_v3p0_v3p0__5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -618,51 +578,52 @@ define void @v_shuffle_v3p0_v3p0__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -676,51 +637,52 @@ define void @v_shuffle_v3p0_v3p0__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -734,42 +696,42 @@ define void @v_shuffle_v3p0_v3p0__5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -783,42 +745,42 @@ define void @v_shuffle_v3p0_v3p0__5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -832,42 +794,42 @@ define void @v_shuffle_v3p0_v3p0__5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -881,42 +843,36 @@ define void @v_shuffle_v3p0_v3p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1025,48 +981,42 @@ define void @v_shuffle_v3p0_v3p0__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1079,42 +1029,36 @@ define void @v_shuffle_v3p0_v3p0__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1185,57 +1129,45 @@ define void @v_shuffle_v3p0_v3p0__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1249,52 +1181,45 @@ define void @v_shuffle_v3p0_v3p0__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1308,52 +1233,51 @@ define void @v_shuffle_v3p0_v3p0__5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1367,51 +1291,51 @@ define void @v_shuffle_v3p0_v3p0__5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1425,52 +1349,51 @@ define void @v_shuffle_v3p0_v3p0__5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1484,51 +1407,51 @@ define void @v_shuffle_v3p0_v3p0__5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1674,42 +1597,42 @@ define void @v_shuffle_v3p0_v3p0__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1823,16 +1746,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1840,16 +1761,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1857,16 +1776,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1881,16 +1798,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1898,16 +1813,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1915,16 +1828,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1938,57 +1849,45 @@ define void @v_shuffle_v3p0_v3p0__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2002,51 +1901,51 @@ define void @v_shuffle_v3p0_v3p0__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2062,18 +1961,16 @@ define void @v_shuffle_v3p0_v3p0__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_1:
@@ -2081,17 +1978,15 @@ define void @v_shuffle_v3p0_v3p0__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2100,18 +1995,16 @@ define void @v_shuffle_v3p0_v3p0__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2125,16 +2018,16 @@ define void @v_shuffle_v3p0_v3p0__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -2142,16 +2035,16 @@ define void @v_shuffle_v3p0_v3p0__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -2159,16 +2052,17 @@ define void @v_shuffle_v3p0_v3p0__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2315,42 +2209,42 @@ define void @v_shuffle_v3p0_v3p0__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2464,16 +2358,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2481,16 +2373,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2498,16 +2388,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2522,16 +2410,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2539,16 +2425,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2556,16 +2440,14 @@ define void @v_shuffle_v3p0_v3p0__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2581,17 +2463,13 @@ define void @v_shuffle_v3p0_v3p0__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2600,17 +2478,13 @@ define void @v_shuffle_v3p0_v3p0__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2619,17 +2493,13 @@ define void @v_shuffle_v3p0_v3p0__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2643,51 +2513,46 @@ define void @v_shuffle_v3p0_v3p0__5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2701,57 +2566,52 @@ define void @v_shuffle_v3p0_v3p0__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2765,51 +2625,52 @@ define void @v_shuffle_v3p0_v3p0__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2915,39 +2776,33 @@ define void @v_shuffle_v3p0_v3p0__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3020,48 +2875,42 @@ define void @v_shuffle_v3p0_v3p0__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3080,9 +2929,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3094,9 +2941,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3108,9 +2953,7 @@ define void @v_shuffle_v3p0_v3p0__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3124,57 +2967,45 @@ define void @v_shuffle_v3p0_v3p0__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3189,16 +3020,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3206,16 +3035,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3223,16 +3050,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3247,16 +3072,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3264,16 +3087,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3281,16 +3102,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3304,42 +3123,42 @@ define void @v_shuffle_v3p0_v3p0__5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3516,13 +3335,13 @@ define void @v_shuffle_v3p0_v3p0__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3533,13 +3352,13 @@ define void @v_shuffle_v3p0_v3p0__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3550,13 +3369,13 @@ define void @v_shuffle_v3p0_v3p0__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3662,42 +3481,42 @@ define void @v_shuffle_v3p0_v3p0__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3711,42 +3530,36 @@ define void @v_shuffle_v3p0_v3p0__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3760,57 +3573,45 @@ define void @v_shuffle_v3p0_v3p0__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3825,16 +3626,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3842,16 +3641,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3859,16 +3656,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3883,16 +3678,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3900,16 +3693,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3917,16 +3708,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3940,48 +3729,42 @@ define void @v_shuffle_v3p0_v3p0__5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4158,13 +3941,13 @@ define void @v_shuffle_v3p0_v3p0__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4175,13 +3958,13 @@ define void @v_shuffle_v3p0_v3p0__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4192,13 +3975,13 @@ define void @v_shuffle_v3p0_v3p0__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4304,42 +4087,36 @@ define void @v_shuffle_v3p0_v3p0__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4353,57 +4130,45 @@ define void @v_shuffle_v3p0_v3p0__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4418,16 +4183,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4435,16 +4198,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4452,16 +4213,14 @@ define void @v_shuffle_v3p0_v3p0__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4476,16 +4235,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4493,16 +4250,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4510,16 +4265,14 @@ define void @v_shuffle_v3p0_v3p0__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4533,48 +4286,42 @@ define void @v_shuffle_v3p0_v3p0__5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4588,42 +4335,42 @@ define void @v_shuffle_v3p0_v3p0__5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v3p0__5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v3p0__5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx2 v6, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4759,10 +4506,9 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4860,10 +4606,9 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4880,15 +4625,13 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -4898,15 +4641,13 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -4969,11 +4710,11 @@ define void @s_shuffle_v3p0_v3p0__5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5024,13 +4765,11 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5043,46 +4782,18 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
}
define void @s_shuffle_v3p0_v3p0__5_3_u() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_3_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 poison>
@@ -5095,10 +4806,10 @@ define void @s_shuffle_v3p0_v3p0__5_4_u() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -5111,50 +4822,18 @@ define void @s_shuffle_v3p0_v3p0__5_4_u() {
}
define void @s_shuffle_v3p0_v3p0__5_5_u() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 poison>
@@ -5163,65 +4842,21 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() {
}
define void @s_shuffle_v3p0_v3p0__5_5_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 0>
@@ -5234,17 +4869,15 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5254,17 +4887,15 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5274,16 +4905,14 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -5304,12 +4933,10 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() {
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5322,12 +4949,10 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() {
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5339,13 +4964,12 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5358,52 +4982,20 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() {
}
define void @s_shuffle_v3p0_v3p0__5_5_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 3>
@@ -5412,74 +5004,38 @@ define void @s_shuffle_v3p0_v3p0__5_5_3() {
}
define void @s_shuffle_v3p0_v3p0__5_5_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
define void @s_shuffle_v3p0_v3p0__5_5_5() {
; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -5492,50 +5048,18 @@ define void @s_shuffle_v3p0_v3p0__5_5_5() {
}
define void @s_shuffle_v3p0_v3p0__u_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -5564,56 +5088,20 @@ define void @s_shuffle_v3p0_v3p0__0_0_0() {
}
define void @s_shuffle_v3p0_v3p0__1_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -5621,52 +5109,20 @@ define void @s_shuffle_v3p0_v3p0__1_0_0() {
}
define void @s_shuffle_v3p0_v3p0__2_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -5674,50 +5130,18 @@ define void @s_shuffle_v3p0_v3p0__2_0_0() {
}
define void @s_shuffle_v3p0_v3p0__3_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -5732,14 +5156,12 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -5752,14 +5174,12 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() {
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -5769,17 +5189,15 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5792,63 +5210,21 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() {
}
define void @s_shuffle_v3p0_v3p0__5_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
@@ -5857,307 +5233,140 @@ define void @s_shuffle_v3p0_v3p0__5_0_0() {
}
define void @s_shuffle_v3p0_v3p0__5_u_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 poison, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__5_1_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__5_2_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__5_3_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__5_4_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_0:
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_0:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_0:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 poison, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_1_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_2_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_3_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_4_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 0>
@@ -6229,12 +5438,12 @@ define void @s_shuffle_v3p0_v3p0__2_1_1() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -6364,12 +5573,10 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6387,15 +5594,13 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6405,15 +5610,13 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6446,17 +5649,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6466,17 +5667,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6507,61 +5706,23 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() {
}
define void @s_shuffle_v3p0_v3p0__5_2_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_2_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_2_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_2_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_2_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 1>
@@ -6574,17 +5735,15 @@ define void @s_shuffle_v3p0_v3p0__5_3_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6594,17 +5753,15 @@ define void @s_shuffle_v3p0_v3p0__5_3_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6639,15 +5796,15 @@ define void @s_shuffle_v3p0_v3p0__5_4_1() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -6657,15 +5814,15 @@ define void @s_shuffle_v3p0_v3p0__5_4_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -6675,14 +5832,14 @@ define void @s_shuffle_v3p0_v3p0__5_4_1() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6759,12 +5916,12 @@ define void @s_shuffle_v3p0_v3p0__2_2_2() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -6894,12 +6051,10 @@ define void @s_shuffle_v3p0_v3p0__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
@@ -6947,11 +6102,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6964,61 +6119,23 @@ define void @s_shuffle_v3p0_v3p0__5_u_2() {
}
define void @s_shuffle_v3p0_v3p0__5_0_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_0_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_0_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_0_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_0_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 0, i32 2>
@@ -7061,11 +6178,11 @@ define void @s_shuffle_v3p0_v3p0__5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7116,13 +6233,12 @@ define void @s_shuffle_v3p0_v3p0__5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7139,15 +6255,13 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7157,15 +6271,13 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7177,13 +6289,12 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7256,252 +6367,155 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__3_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v3p0__5_u_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
+define void @s_shuffle_v3p0_v3p0__2_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__3_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__4_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_u_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_u_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 poison, i32 3>
@@ -7514,15 +6528,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_3() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7532,15 +6546,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_3() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7609,13 +6623,12 @@ define void @s_shuffle_v3p0_v3p0__5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7668,15 +6681,15 @@ define void @s_shuffle_v3p0_v3p0__5_2_3() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7689,52 +6702,20 @@ define void @s_shuffle_v3p0_v3p0__5_2_3() {
}
define void @s_shuffle_v3p0_v3p0__5_4_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 3>
@@ -7892,12 +6873,12 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -7910,12 +6891,12 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -7925,15 +6906,16 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7986,70 +6968,42 @@ define void @s_shuffle_v3p0_v3p0__4_4_4() {
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-
-define void @s_shuffle_v3p0_v3p0__5_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__5_u_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+
+define void @s_shuffle_v3p0_v3p0__5_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_u_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_u_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 poison, i32 4>
@@ -8062,17 +7016,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_4() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8082,17 +7034,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_4() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8161,13 +7111,12 @@ define void @s_shuffle_v3p0_v3p0__5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8220,17 +7169,15 @@ define void @s_shuffle_v3p0_v3p0__5_2_4() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8243,52 +7190,20 @@ define void @s_shuffle_v3p0_v3p0__5_2_4() {
}
define void @s_shuffle_v3p0_v3p0__5_3_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_3_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_3_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 4>
@@ -8446,12 +7361,12 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8464,12 +7379,12 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8479,15 +7394,16 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8546,10 +7462,10 @@ define void @s_shuffle_v3p0_v3p0__5_u_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -8566,15 +7482,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8584,15 +7500,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8602,15 +7518,15 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8630,12 +7546,10 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() {
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8648,12 +7562,10 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() {
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8665,13 +7577,12 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8688,15 +7599,15 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8706,15 +7617,15 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8726,13 +7637,13 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8745,56 +7656,20 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() {
}
define void @s_shuffle_v3p0_v3p0__5_3_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_3_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_3_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 5>
@@ -8807,10 +7682,12 @@ define void @s_shuffle_v3p0_v3p0__5_4_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
index b03066e66cf66..b60d7f80b9cc2 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
@@ -139,39 +139,33 @@ define void @v_shuffle_v3p0_v4p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -275,39 +269,33 @@ define void @v_shuffle_v3p0_v4p0__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -321,55 +309,42 @@ define void @v_shuffle_v3p0_v4p0__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -383,49 +358,43 @@ define void @v_shuffle_v3p0_v4p0__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -439,49 +408,43 @@ define void @v_shuffle_v3p0_v4p0__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -495,49 +458,43 @@ define void @v_shuffle_v3p0_v4p0__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -551,45 +508,40 @@ define void @v_shuffle_v3p0_v4p0__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -603,39 +555,40 @@ define void @v_shuffle_v3p0_v4p0__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -649,39 +602,40 @@ define void @v_shuffle_v3p0_v4p0__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -695,39 +649,40 @@ define void @v_shuffle_v3p0_v4p0__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -741,51 +696,51 @@ define void @v_shuffle_v3p0_v4p0__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -799,51 +754,52 @@ define void @v_shuffle_v3p0_v4p0__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -857,51 +813,52 @@ define void @v_shuffle_v3p0_v4p0__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -915,51 +872,52 @@ define void @v_shuffle_v3p0_v4p0__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -973,42 +931,42 @@ define void @v_shuffle_v3p0_v4p0__7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1022,42 +980,42 @@ define void @v_shuffle_v3p0_v4p0__7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1071,42 +1029,42 @@ define void @v_shuffle_v3p0_v4p0__7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1120,42 +1078,42 @@ define void @v_shuffle_v3p0_v4p0__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1169,42 +1127,36 @@ define void @v_shuffle_v3p0_v4p0__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1361,48 +1313,42 @@ define void @v_shuffle_v3p0_v4p0__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1415,42 +1361,36 @@ define void @v_shuffle_v3p0_v4p0__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__4_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v0, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1579,57 +1519,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1643,52 +1571,45 @@ define void @v_shuffle_v3p0_v4p0__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1702,52 +1623,51 @@ define void @v_shuffle_v3p0_v4p0__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1761,51 +1681,51 @@ define void @v_shuffle_v3p0_v4p0__7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1819,51 +1739,51 @@ define void @v_shuffle_v3p0_v4p0__7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1877,52 +1797,51 @@ define void @v_shuffle_v3p0_v4p0__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1936,51 +1855,51 @@ define void @v_shuffle_v3p0_v4p0__7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1994,51 +1913,51 @@ define void @v_shuffle_v3p0_v4p0__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2232,42 +2151,42 @@ define void @v_shuffle_v3p0_v4p0__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2439,16 +2358,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2456,16 +2373,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2473,16 +2388,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2497,16 +2410,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2514,16 +2425,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2531,16 +2440,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2554,57 +2461,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2620,15 +2515,15 @@ define void @v_shuffle_v3p0_v4p0__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2637,15 +2532,15 @@ define void @v_shuffle_v3p0_v4p0__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2654,16 +2549,15 @@ define void @v_shuffle_v3p0_v4p0__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2677,51 +2571,51 @@ define void @v_shuffle_v3p0_v4p0__7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2735,58 +2629,52 @@ define void @v_shuffle_v3p0_v4p0__7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2800,51 +2688,52 @@ define void @v_shuffle_v3p0_v4p0__7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2858,51 +2747,52 @@ define void @v_shuffle_v3p0_v4p0__7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3096,42 +2986,42 @@ define void @v_shuffle_v3p0_v4p0__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3303,16 +3193,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3320,16 +3208,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3337,16 +3223,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3361,16 +3245,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3378,16 +3260,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3395,16 +3275,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3418,57 +3296,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3482,51 +3348,46 @@ define void @v_shuffle_v3p0_v4p0__7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3540,51 +3401,51 @@ define void @v_shuffle_v3p0_v4p0__7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3598,57 +3459,52 @@ define void @v_shuffle_v3p0_v4p0__7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3662,51 +3518,52 @@ define void @v_shuffle_v3p0_v4p0__7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3720,51 +3577,52 @@ define void @v_shuffle_v3p0_v4p0__7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v12
-; GFX900-NEXT: v_mov_b32_e32 v9, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v12
-; GFX90A-NEXT: v_mov_b32_e32 v9, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3958,42 +3816,42 @@ define void @v_shuffle_v3p0_v4p0__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4165,16 +4023,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4182,16 +4038,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4199,16 +4053,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4223,16 +4075,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4240,16 +4090,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4257,16 +4105,14 @@ define void @v_shuffle_v3p0_v4p0__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4280,57 +4126,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4344,51 +4178,46 @@ define void @v_shuffle_v3p0_v4p0__7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4402,51 +4231,46 @@ define void @v_shuffle_v3p0_v4p0__7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v14
-; GFX900-NEXT: v_mov_b32_e32 v3, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4460,57 +4284,52 @@ define void @v_shuffle_v3p0_v4p0__7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4524,51 +4343,52 @@ define void @v_shuffle_v3p0_v4p0__7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v14
-; GFX900-NEXT: v_mov_b32_e32 v9, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v14
-; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4582,51 +4402,52 @@ define void @v_shuffle_v3p0_v4p0__7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v14
-; GFX900-NEXT: v_mov_b32_e32 v11, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v14
-; GFX90A-NEXT: v_mov_b32_e32 v11, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4771,39 +4592,33 @@ define void @v_shuffle_v3p0_v4p0__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4925,48 +4740,42 @@ define void @v_shuffle_v3p0_v4p0__7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4985,9 +4794,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4999,9 +4806,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5013,9 +4818,7 @@ define void @v_shuffle_v3p0_v4p0__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5029,57 +4832,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5094,16 +4885,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5111,16 +4900,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5128,16 +4915,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5152,16 +4937,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5169,16 +4952,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5186,16 +4967,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5210,16 +4989,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5227,16 +5004,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5244,16 +5019,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5267,42 +5040,42 @@ define void @v_shuffle_v3p0_v4p0__7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5316,42 +5089,42 @@ define void @v_shuffle_v3p0_v4p0__7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[0:1], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[0:1], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5586,13 +5359,13 @@ define void @v_shuffle_v3p0_v4p0__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5603,13 +5376,13 @@ define void @v_shuffle_v3p0_v4p0__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5620,13 +5393,13 @@ define void @v_shuffle_v3p0_v4p0__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5781,42 +5554,42 @@ define void @v_shuffle_v3p0_v4p0__7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5830,42 +5603,36 @@ define void @v_shuffle_v3p0_v4p0__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5879,57 +5646,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5944,16 +5699,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5961,16 +5714,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5978,16 +5729,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6002,16 +5751,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6019,16 +5766,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6036,16 +5781,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6060,16 +5803,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6077,16 +5818,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6094,16 +5833,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6117,48 +5854,42 @@ define void @v_shuffle_v3p0_v4p0__7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6172,42 +5903,42 @@ define void @v_shuffle_v3p0_v4p0__7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[2:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[2:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[2:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[2:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6442,13 +6173,13 @@ define void @v_shuffle_v3p0_v4p0__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6459,13 +6190,13 @@ define void @v_shuffle_v3p0_v4p0__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
; GFX90A-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6476,13 +6207,13 @@ define void @v_shuffle_v3p0_v4p0__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6637,42 +6368,42 @@ define void @v_shuffle_v3p0_v4p0__7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6686,42 +6417,36 @@ define void @v_shuffle_v3p0_v4p0__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6735,57 +6460,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6800,16 +6513,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6817,16 +6528,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6834,16 +6543,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6858,16 +6565,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6875,16 +6580,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6892,16 +6595,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6916,16 +6617,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6933,16 +6632,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[12:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6950,16 +6647,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[12:13], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6973,48 +6668,42 @@ define void @v_shuffle_v3p0_v4p0__7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7028,42 +6717,42 @@ define void @v_shuffle_v3p0_v4p0__7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[4:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[4:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[4:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[4:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7298,13 +6987,13 @@ define void @v_shuffle_v3p0_v4p0__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7315,13 +7004,13 @@ define void @v_shuffle_v3p0_v4p0__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
; GFX90A-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7332,13 +7021,13 @@ define void @v_shuffle_v3p0_v4p0__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7493,42 +7182,36 @@ define void @v_shuffle_v3p0_v4p0__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_u_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7542,57 +7225,45 @@ define void @v_shuffle_v3p0_v4p0__7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_0_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_0_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v10, v[8:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v16, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_0_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v10, v[8:9], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v16, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7607,16 +7278,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7624,16 +7293,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx2 v12, v[10:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v14, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7641,16 +7308,14 @@ define void @v_shuffle_v3p0_v4p0__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v12, v[10:11], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v14, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7665,16 +7330,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7682,16 +7345,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx2 v14, v[12:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v12, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7699,16 +7360,14 @@ define void @v_shuffle_v3p0_v4p0__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v14, v[12:13], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v12, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7723,16 +7382,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7740,16 +7397,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx2 v16, v[14:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7757,16 +7412,14 @@ define void @v_shuffle_v3p0_v4p0__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx2 v16, v[14:15], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7780,48 +7433,42 @@ define void @v_shuffle_v3p0_v4p0__7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_4_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_4_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7835,42 +7482,42 @@ define void @v_shuffle_v3p0_v4p0__7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_5_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7884,42 +7531,42 @@ define void @v_shuffle_v3p0_v4p0__7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p0_v4p0__7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p0_v4p0__7_6_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx2 v8, v[6:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx2 v10, v[6:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_6_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx2 v8, v[6:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx2 v10, v[6:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8055,10 +7702,9 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8201,10 +7847,9 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8477,15 +8122,14 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8622,134 +8266,62 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() {
}
define void @s_shuffle_v3p0_v4p0__7_7_u() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_u:
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_7_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_7_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_7_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_7_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -8759,17 +8331,15 @@ define void @s_shuffle_v3p0_v4p0__7_7_1() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -8838,13 +8408,12 @@ define void @s_shuffle_v3p0_v4p0__7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8899,15 +8468,13 @@ define void @s_shuffle_v3p0_v4p0__7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8920,52 +8487,20 @@ define void @s_shuffle_v3p0_v4p0__7_7_3() {
}
define void @s_shuffle_v3p0_v4p0__7_7_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
@@ -8974,1103 +8509,58 @@ define void @s_shuffle_v3p0_v4p0__7_7_4() {
}
define void @s_shuffle_v3p0_v4p0__7_7_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_7_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_7_7() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> zeroinitializer
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__1_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__2_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__3_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__6_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_0_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_u_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_1_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_2_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_3_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_4_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_5_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_6_0() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
+define void @s_shuffle_v3p0_v4p0__7_7_6() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
+define void @s_shuffle_v3p0_v4p0__7_7_7() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_7:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
; GFX9-NEXT: s_mov_b32 s12, s10
; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
@@ -10078,473 +8568,397 @@ define void @s_shuffle_v3p0_v4p0__0_1_1() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
+define void @s_shuffle_v3p0_v4p0__u_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
+define void @s_shuffle_v3p0_v4p0__0_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
+define void @s_shuffle_v3p0_v4p0__1_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s14
; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
+define void @s_shuffle_v3p0_v4p0__2_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3p0_v4p0__3_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3p0_v4p0__4_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+define void @s_shuffle_v3p0_v4p0__5_0_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+define void @s_shuffle_v3p0_v4p0__6_0_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_0_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+define void @s_shuffle_v3p0_v4p0__7_u_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+define void @s_shuffle_v3p0_v4p0__7_1_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+define void @s_shuffle_v3p0_v4p0__7_2_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10553,16 +8967,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_1() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10571,63 +8985,84 @@ define void @s_shuffle_v3p0_v4p0__7_3_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+define void @s_shuffle_v3p0_v4p0__7_3_0() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_4_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10636,45 +9071,41 @@ define void @s_shuffle_v3p0_v4p0__7_4_1() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+define void @s_shuffle_v3p0_v4p0__7_5_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10685,14 +9116,14 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10703,14 +9134,14 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10721,126 +9152,120 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+define void @s_shuffle_v3p0_v4p0__7_6_0() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+define void @s_shuffle_v3p0_v4p0__u_1_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+define void @s_shuffle_v3p0_v4p0__0_1_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+define void @s_shuffle_v3p0_v4p0__1_1_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
@@ -10848,41 +9273,41 @@ define void @s_shuffle_v3p0_v4p0__1_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+define void @s_shuffle_v3p0_v4p0__2_1_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+define void @s_shuffle_v3p0_v4p0__3_1_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
@@ -10890,75 +9315,75 @@ define void @s_shuffle_v3p0_v4p0__3_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s14
; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+define void @s_shuffle_v3p0_v4p0__4_1_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+define void @s_shuffle_v3p0_v4p0__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10969,171 +9394,118 @@ define void @s_shuffle_v3p0_v4p0__5_2_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__6_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+define void @s_shuffle_v3p0_v4p0__6_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+define void @s_shuffle_v3p0_v4p0__7_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11144,177 +9516,189 @@ define void @s_shuffle_v3p0_v4p0__7_u_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+define void @s_shuffle_v3p0_v4p0__7_u_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+define void @s_shuffle_v3p0_v4p0__7_0_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+define void @s_shuffle_v3p0_v4p0__7_2_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11323,120 +9707,153 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+define void @s_shuffle_v3p0_v4p0__7_3_1() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_4_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+define void @s_shuffle_v3p0_v4p0__7_5_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11447,57 +9864,61 @@ define void @s_shuffle_v3p0_v4p0__7_5_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+define void @s_shuffle_v3p0_v4p0__7_6_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11506,357 +9927,418 @@ define void @s_shuffle_v3p0_v4p0__7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__u_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3p0_v4p0__u_2_2() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+define void @s_shuffle_v3p0_v4p0__0_2_2() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+define void @s_shuffle_v3p0_v4p0__1_2_2() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__2_2_2() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__3_2_2() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__4_2_2() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+define void @s_shuffle_v3p0_v4p0__6_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+define void @s_shuffle_v3p0_v4p0__7_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+define void @s_shuffle_v3p0_v4p0__7_u_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+define void @s_shuffle_v3p0_v4p0__7_0_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11865,61 +10347,57 @@ define void @s_shuffle_v3p0_v4p0__5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+define void @s_shuffle_v3p0_v4p0__7_1_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11928,25 +10406,21 @@ define void @s_shuffle_v3p0_v4p0__6_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+define void @s_shuffle_v3p0_v4p0__7_3_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11959,14 +10433,12 @@ define void @s_shuffle_v3p0_v4p0__7_3_3() {
; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11979,77 +10451,73 @@ define void @s_shuffle_v3p0_v4p0__7_3_3() {
; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+define void @s_shuffle_v3p0_v4p0__7_4_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12058,63 +10526,59 @@ define void @s_shuffle_v3p0_v4p0__7_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+define void @s_shuffle_v3p0_v4p0__7_5_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12125,23 +10589,21 @@ define void @s_shuffle_v3p0_v4p0__7_0_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+define void @s_shuffle_v3p0_v4p0__7_6_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -12152,14 +10614,14 @@ define void @s_shuffle_v3p0_v4p0__7_1_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -12170,14 +10632,14 @@ define void @s_shuffle_v3p0_v4p0__7_1_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12188,88 +10650,202 @@ define void @s_shuffle_v3p0_v4p0__7_1_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+define void @s_shuffle_v3p0_v4p0__u_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__1_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__2_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__3_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__4_3_3() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+define void @s_shuffle_v3p0_v4p0__6_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -12278,10 +10854,8 @@ define void @s_shuffle_v3p0_v4p0__7_4_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -12289,7 +10863,7 @@ define void @s_shuffle_v3p0_v4p0__7_4_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -12298,10 +10872,8 @@ define void @s_shuffle_v3p0_v4p0__7_4_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -12309,135 +10881,129 @@ define void @s_shuffle_v3p0_v4p0__7_4_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+define void @s_shuffle_v3p0_v4p0__7_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+define void @s_shuffle_v3p0_v4p0__7_u_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12448,8 +11014,6 @@ define void @s_shuffle_v3p0_v4p0__7_6_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s6
; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
@@ -12458,620 +11022,687 @@ define void @s_shuffle_v3p0_v4p0__7_6_3() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+define void @s_shuffle_v3p0_v4p0__7_0_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__1_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+define void @s_shuffle_v3p0_v4p0__7_1_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+define void @s_shuffle_v3p0_v4p0__7_2_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+define void @s_shuffle_v3p0_v4p0__7_4_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+define void @s_shuffle_v3p0_v4p0__7_5_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+define void @s_shuffle_v3p0_v4p0__7_6_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+define void @s_shuffle_v3p0_v4p0__u_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+define void @s_shuffle_v3p0_v4p0__1_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+define void @s_shuffle_v3p0_v4p0__2_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+define void @s_shuffle_v3p0_v4p0__3_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__4_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+define void @s_shuffle_v3p0_v4p0__6_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_u_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_0_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13082,12 +11713,14 @@ define void @s_shuffle_v3p0_v4p0__7_3_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13098,12 +11731,14 @@ define void @s_shuffle_v3p0_v4p0__7_3_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13114,124 +11749,196 @@ define void @s_shuffle_v3p0_v4p0__7_3_4() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s18
; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+define void @s_shuffle_v3p0_v4p0__7_1_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+define void @s_shuffle_v3p0_v4p0__7_2_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_3_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_5_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_6_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
@@ -13386,15 +12093,15 @@ define void @s_shuffle_v3p0_v4p0__2_5_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -13404,15 +12111,15 @@ define void @s_shuffle_v3p0_v4p0__2_5_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -13422,15 +12129,16 @@ define void @s_shuffle_v3p0_v4p0__2_5_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -13550,12 +12258,12 @@ define void @s_shuffle_v3p0_v4p0__6_5_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -13878,17 +12586,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14163,15 +12870,15 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14181,15 +12888,15 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14199,15 +12906,16 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14327,12 +13035,12 @@ define void @s_shuffle_v3p0_v4p0__6_6_6() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:13]
; GFX9-NEXT: ;;#ASMEND
@@ -14574,15 +13282,15 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14592,15 +13300,15 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14610,15 +13318,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14671,88 +13380,56 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_5_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_7_7() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_5_6() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_7_7() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
@@ -14826,17 +13503,15 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14846,17 +13521,15 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14866,17 +13539,16 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14929,17 +13601,16 @@ define void @s_shuffle_v3p0_v4p0__2_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14956,17 +13627,15 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
@@ -14976,17 +13645,15 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
@@ -14998,15 +13665,14 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15041,56 +13707,20 @@ define void @s_shuffle_v3p0_v4p0__4_7_7() {
}
define void @s_shuffle_v3p0_v4p0__5_7_7() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
@@ -15099,56 +13729,18 @@ define void @s_shuffle_v3p0_v4p0__5_7_7() {
}
define void @s_shuffle_v3p0_v4p0__6_7_7() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_7_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_7_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_7_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
@@ -15444,17 +14036,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
index 1434189e6bda1..db1cf9faff8ec 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v2p3.ll
@@ -776,15 +776,14 @@ define void @v_shuffle_v3p3_v2p3__3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v2p3__3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v2, v[1:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
index 0c5fe591656bb..cbcb0b64e2ef8 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v3p3.ll
@@ -99,36 +99,33 @@ define void @v_shuffle_v3p3_v3p3__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -194,36 +191,33 @@ define void @v_shuffle_v3p3_v3p3__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -237,48 +231,45 @@ define void @v_shuffle_v3p3_v3p3__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -292,46 +283,43 @@ define void @v_shuffle_v3p3_v3p3__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -345,15 +333,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -361,15 +348,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -377,15 +363,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -401,37 +386,35 @@ define void @v_shuffle_v3p3_v3p3__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -445,36 +428,37 @@ define void @v_shuffle_v3p3_v3p3__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -488,38 +472,37 @@ define void @v_shuffle_v3p3_v3p3__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -533,50 +516,46 @@ define void @v_shuffle_v3p3_v3p3__5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -591,14 +570,14 @@ define void @v_shuffle_v3p3_v3p3__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -606,16 +585,15 @@ define void @v_shuffle_v3p3_v3p3__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -623,17 +601,15 @@ define void @v_shuffle_v3p3_v3p3__5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -648,14 +624,14 @@ define void @v_shuffle_v3p3_v3p3__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -663,15 +639,14 @@ define void @v_shuffle_v3p3_v3p3__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -679,16 +654,15 @@ define void @v_shuffle_v3p3_v3p3__5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -702,42 +676,40 @@ define void @v_shuffle_v3p3_v3p3__5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -751,42 +723,40 @@ define void @v_shuffle_v3p3_v3p3__5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -800,39 +770,40 @@ define void @v_shuffle_v3p3_v3p3__5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -858,26 +829,25 @@ define void @v_shuffle_v3p3_v3p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -948,29 +918,27 @@ define void @v_shuffle_v3p3_v3p3__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -983,42 +951,40 @@ define void @v_shuffle_v3p3_v3p3__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1043,26 +1009,25 @@ define void @v_shuffle_v3p3_v3p3__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1091,16 +1056,15 @@ define void @v_shuffle_v3p3_v3p3__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1108,17 +1072,15 @@ define void @v_shuffle_v3p3_v3p3__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v7, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1133,50 +1095,45 @@ define void @v_shuffle_v3p3_v3p3__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1190,49 +1147,42 @@ define void @v_shuffle_v3p3_v3p3__5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1246,52 +1196,46 @@ define void @v_shuffle_v3p3_v3p3__5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1305,50 +1249,46 @@ define void @v_shuffle_v3p3_v3p3__5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1362,50 +1302,46 @@ define void @v_shuffle_v3p3_v3p3__5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1419,49 +1355,46 @@ define void @v_shuffle_v3p3_v3p3__5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1604,39 +1537,40 @@ define void @v_shuffle_v3p3_v3p3__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1747,15 +1681,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1763,15 +1696,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1779,16 +1711,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1802,15 +1732,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1818,15 +1747,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,16 +1762,14 @@ define void @v_shuffle_v3p3_v3p3__5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1857,15 +1783,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1873,16 +1798,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1890,17 +1814,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1915,15 +1837,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1933,14 +1854,13 @@ define void @v_shuffle_v3p3_v3p3__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1950,14 +1870,13 @@ define void @v_shuffle_v3p3_v3p3__5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1974,13 +1893,12 @@ define void @v_shuffle_v3p3_v3p3__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1988,16 +1906,15 @@ define void @v_shuffle_v3p3_v3p3__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2005,16 +1922,15 @@ define void @v_shuffle_v3p3_v3p3__5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2029,15 +1945,14 @@ define void @v_shuffle_v3p3_v3p3__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2045,15 +1960,15 @@ define void @v_shuffle_v3p3_v3p3__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2061,16 +1976,15 @@ define void @v_shuffle_v3p3_v3p3__5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2212,39 +2126,40 @@ define void @v_shuffle_v3p3_v3p3__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2353,16 +2268,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2370,15 +2284,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2386,15 +2299,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2408,46 +2320,43 @@ define void @v_shuffle_v3p3_v3p3__5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2462,15 +2371,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2480,14 +2388,13 @@ define void @v_shuffle_v3p3_v3p3__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2497,14 +2404,13 @@ define void @v_shuffle_v3p3_v3p3__5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2518,46 +2424,43 @@ define void @v_shuffle_v3p3_v3p3__5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2574,13 +2477,12 @@ define void @v_shuffle_v3p3_v3p3__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2588,15 +2490,14 @@ define void @v_shuffle_v3p3_v3p3__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2604,16 +2505,15 @@ define void @v_shuffle_v3p3_v3p3__5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2628,15 +2528,14 @@ define void @v_shuffle_v3p3_v3p3__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2644,15 +2543,14 @@ define void @v_shuffle_v3p3_v3p3__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2660,15 +2558,15 @@ define void @v_shuffle_v3p3_v3p3__5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2773,36 +2671,33 @@ define void @v_shuffle_v3p3_v3p3__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2839,29 +2734,27 @@ define void @v_shuffle_v3p3_v3p3__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2875,42 +2768,40 @@ define void @v_shuffle_v3p3_v3p3__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2924,13 +2815,12 @@ define void @v_shuffle_v3p3_v3p3__5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2970,14 +2860,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2985,16 +2875,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3002,16 +2891,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3026,14 +2914,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3041,14 +2929,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3056,15 +2944,15 @@ define void @v_shuffle_v3p3_v3p3__5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3079,14 +2967,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3094,16 +2982,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3111,16 +2998,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3147,29 +3033,27 @@ define void @v_shuffle_v3p3_v3p3__5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3340,12 +3224,11 @@ define void @v_shuffle_v3p3_v3p3__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3355,13 +3238,13 @@ define void @v_shuffle_v3p3_v3p3__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3371,13 +3254,13 @@ define void @v_shuffle_v3p3_v3p3__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3480,39 +3363,40 @@ define void @v_shuffle_v3p3_v3p3__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3526,38 +3410,37 @@ define void @v_shuffle_v3p3_v3p3__5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3572,15 +3455,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3588,16 +3470,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: global_store_dwordx3 v5, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3605,16 +3486,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: global_store_dwordx3 v5, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3629,15 +3509,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3645,15 +3524,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3661,16 +3539,15 @@ define void @v_shuffle_v3p3_v3p3__5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3685,15 +3562,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3701,16 +3577,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3718,16 +3593,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3741,41 +3615,40 @@ define void @v_shuffle_v3p3_v3p3__5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3945,15 +3818,15 @@ define void @v_shuffle_v3p3_v3p3__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3963,13 +3836,13 @@ define void @v_shuffle_v3p3_v3p3__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3979,13 +3852,13 @@ define void @v_shuffle_v3p3_v3p3__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4087,36 +3960,37 @@ define void @v_shuffle_v3p3_v3p3__5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v3, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v3, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4131,15 +4005,14 @@ define void @v_shuffle_v3p3_v3p3__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4147,15 +4020,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4163,16 +4036,15 @@ define void @v_shuffle_v3p3_v3p3__5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v7, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4187,15 +4059,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4203,15 +4074,14 @@ define void @v_shuffle_v3p3_v3p3__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v5, v[0:2], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4219,16 +4089,15 @@ define void @v_shuffle_v3p3_v3p3__5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v5, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4243,15 +4112,14 @@ define void @v_shuffle_v3p3_v3p3__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4259,15 +4127,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4275,15 +4143,15 @@ define void @v_shuffle_v3p3_v3p3__5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4297,42 +4165,40 @@ define void @v_shuffle_v3p3_v3p3__5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[4:6], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[4:6], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4346,36 +4212,40 @@ define void @v_shuffle_v3p3_v3p3__5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v3p3__5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v3p3__5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v3, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v3p3__5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
index c9f194d873e35..6127b40404f5e 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p3.v4p3.ll
@@ -99,36 +99,33 @@ define void @v_shuffle_v3p3_v4p3__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -141,12 +138,11 @@ define void @v_shuffle_v3p3_v4p3__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -236,36 +232,33 @@ define void @v_shuffle_v3p3_v4p3__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__6_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -279,12 +272,11 @@ define void @v_shuffle_v3p3_v4p3__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -322,16 +314,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -378,15 +368,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -431,15 +420,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -485,16 +473,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,9 +494,8 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -524,9 +509,9 @@ define void @v_shuffle_v3p3_v4p3__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -540,13 +525,12 @@ define void @v_shuffle_v3p3_v4p3__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -586,12 +570,12 @@ define void @v_shuffle_v3p3_v4p3__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -629,12 +613,12 @@ define void @v_shuffle_v3p3_v4p3__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -674,13 +658,12 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -691,9 +674,8 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -704,9 +686,8 @@ define void @v_shuffle_v3p3_v4p3__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -720,17 +701,15 @@ define void @v_shuffle_v3p3_v4p3__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -740,14 +719,12 @@ define void @v_shuffle_v3p3_v4p3__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -757,15 +734,12 @@ define void @v_shuffle_v3p3_v4p3__7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -780,16 +754,14 @@ define void @v_shuffle_v3p3_v4p3__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -797,16 +769,15 @@ define void @v_shuffle_v3p3_v4p3__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -814,17 +785,15 @@ define void @v_shuffle_v3p3_v4p3__7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -839,15 +808,14 @@ define void @v_shuffle_v3p3_v4p3__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -855,15 +823,14 @@ define void @v_shuffle_v3p3_v4p3__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -871,16 +838,15 @@ define void @v_shuffle_v3p3_v4p3__7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -895,15 +861,14 @@ define void @v_shuffle_v3p3_v4p3__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -911,16 +876,15 @@ define void @v_shuffle_v3p3_v4p3__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -928,16 +892,15 @@ define void @v_shuffle_v3p3_v4p3__7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -951,43 +914,39 @@ define void @v_shuffle_v3p3_v4p3__7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1001,42 +960,39 @@ define void @v_shuffle_v3p3_v4p3__7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1050,13 +1006,13 @@ define void @v_shuffle_v3p3_v4p3__7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1096,13 +1052,13 @@ define void @v_shuffle_v3p3_v4p3__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1112,11 +1068,10 @@ define void @v_shuffle_v3p3_v4p3__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1126,11 +1081,10 @@ define void @v_shuffle_v3p3_v4p3__7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1145,38 +1099,36 @@ define void @v_shuffle_v3p3_v4p3__u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1247,29 +1199,27 @@ define void @v_shuffle_v3p3_v4p3__1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1285,40 +1235,36 @@ define void @v_shuffle_v3p3_v4p3__2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__2_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1331,43 +1277,40 @@ define void @v_shuffle_v3p3_v4p3__3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__3_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__3_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__3_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1381,38 +1324,36 @@ define void @v_shuffle_v3p3_v4p3__4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v0, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v0, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1442,16 +1383,15 @@ define void @v_shuffle_v3p3_v4p3__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1459,17 +1399,15 @@ define void @v_shuffle_v3p3_v4p3__5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1486,14 +1424,12 @@ define void @v_shuffle_v3p3_v4p3__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1503,14 +1439,12 @@ define void @v_shuffle_v3p3_v4p3__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1520,15 +1454,13 @@ define void @v_shuffle_v3p3_v4p3__6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1543,16 +1475,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1560,16 +1490,15 @@ define void @v_shuffle_v3p3_v4p3__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1577,17 +1506,15 @@ define void @v_shuffle_v3p3_v4p3__7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1601,16 +1528,14 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1618,15 +1543,14 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1634,16 +1558,15 @@ define void @v_shuffle_v3p3_v4p3__7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1657,17 +1580,15 @@ define void @v_shuffle_v3p3_v4p3__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1675,16 +1596,15 @@ define void @v_shuffle_v3p3_v4p3__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1692,17 +1612,15 @@ define void @v_shuffle_v3p3_v4p3__7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1716,50 +1634,48 @@ define void @v_shuffle_v3p3_v4p3__7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v9
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v10, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v9
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v10, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1773,17 +1689,15 @@ define void @v_shuffle_v3p3_v4p3__7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1829,17 +1743,15 @@ define void @v_shuffle_v3p3_v4p3__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1847,16 +1759,15 @@ define void @v_shuffle_v3p3_v4p3__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[8:10], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1864,16 +1775,16 @@ define void @v_shuffle_v3p3_v4p3__7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx3 v6, v[8:10], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1887,16 +1798,15 @@ define void @v_shuffle_v3p3_v4p3__7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1943,16 +1853,15 @@ define void @v_shuffle_v3p3_v4p3__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v9, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1960,16 +1869,15 @@ define void @v_shuffle_v3p3_v4p3__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1977,17 +1885,16 @@ define void @v_shuffle_v3p3_v4p3__7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2133,10 +2040,10 @@ define void @v_shuffle_v3p3_v4p3__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2146,10 +2053,10 @@ define void @v_shuffle_v3p3_v4p3__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2159,10 +2066,10 @@ define void @v_shuffle_v3p3_v4p3__2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2175,13 +2082,13 @@ define void @v_shuffle_v3p3_v4p3__3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2319,15 +2226,14 @@ define void @v_shuffle_v3p3_v4p3__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2335,15 +2241,15 @@ define void @v_shuffle_v3p3_v4p3__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2351,16 +2257,15 @@ define void @v_shuffle_v3p3_v4p3__6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2375,15 +2280,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2430,16 +2334,14 @@ define void @v_shuffle_v3p3_v4p3__7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2486,17 +2388,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2546,16 +2445,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2604,15 +2501,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2659,16 +2555,14 @@ define void @v_shuffle_v3p3_v4p3__7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2717,15 +2611,14 @@ define void @v_shuffle_v3p3_v4p3__7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2773,15 +2666,14 @@ define void @v_shuffle_v3p3_v4p3__7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2961,10 +2853,10 @@ define void @v_shuffle_v3p3_v4p3__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2974,10 +2866,10 @@ define void @v_shuffle_v3p3_v4p3__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2987,10 +2879,10 @@ define void @v_shuffle_v3p3_v4p3__2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3003,13 +2895,13 @@ define void @v_shuffle_v3p3_v4p3__3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3146,15 +3038,14 @@ define void @v_shuffle_v3p3_v4p3__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3162,15 +3053,14 @@ define void @v_shuffle_v3p3_v4p3__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3178,15 +3068,15 @@ define void @v_shuffle_v3p3_v4p3__6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3201,15 +3091,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3255,15 +3144,14 @@ define void @v_shuffle_v3p3_v4p3__7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3309,16 +3197,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3366,15 +3252,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3420,15 +3305,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3475,15 +3359,14 @@ define void @v_shuffle_v3p3_v4p3__7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v7, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3531,14 +3414,13 @@ define void @v_shuffle_v3p3_v4p3__7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3586,15 +3468,14 @@ define void @v_shuffle_v3p3_v4p3__7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3641,38 +3522,37 @@ define void @v_shuffle_v3p3_v4p3__u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3745,11 +3625,10 @@ define void @v_shuffle_v3p3_v4p3__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3759,11 +3638,10 @@ define void @v_shuffle_v3p3_v4p3__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3776,41 +3654,37 @@ define void @v_shuffle_v3p3_v4p3__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3823,13 +3697,13 @@ define void @v_shuffle_v3p3_v4p3__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3839,11 +3713,10 @@ define void @v_shuffle_v3p3_v4p3__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3853,11 +3726,10 @@ define void @v_shuffle_v3p3_v4p3__3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3870,38 +3742,37 @@ define void @v_shuffle_v3p3_v4p3__4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3933,14 +3804,13 @@ define void @v_shuffle_v3p3_v4p3__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3950,14 +3820,13 @@ define void @v_shuffle_v3p3_v4p3__5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3972,15 +3841,14 @@ define void @v_shuffle_v3p3_v4p3__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3988,16 +3856,15 @@ define void @v_shuffle_v3p3_v4p3__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4005,16 +3872,15 @@ define void @v_shuffle_v3p3_v4p3__6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4028,16 +3894,15 @@ define void @v_shuffle_v3p3_v4p3__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4047,14 +3912,13 @@ define void @v_shuffle_v3p3_v4p3__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4064,14 +3928,13 @@ define void @v_shuffle_v3p3_v4p3__7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4085,15 +3948,14 @@ define void @v_shuffle_v3p3_v4p3__7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4140,15 +4002,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4197,15 +4058,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4251,15 +4111,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4308,15 +4167,14 @@ define void @v_shuffle_v3p3_v4p3__7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v8, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4365,15 +4223,14 @@ define void @v_shuffle_v3p3_v4p3__7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4420,15 +4277,14 @@ define void @v_shuffle_v3p3_v4p3__7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4567,36 +4423,33 @@ define void @v_shuffle_v3p3_v4p3__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: global_store_dwordx3 v4, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: global_store_dwordx3 v4, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4609,12 +4462,11 @@ define void @v_shuffle_v3p3_v4p3__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4675,29 +4527,27 @@ define void @v_shuffle_v3p3_v4p3__5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v3
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v3
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4714,40 +4564,36 @@ define void @v_shuffle_v3p3_v4p3__6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4761,43 +4607,40 @@ define void @v_shuffle_v3p3_v4p3__7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4811,40 +4654,37 @@ define void @v_shuffle_v3p3_v4p3__7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: global_store_dwordx3 v1, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: global_store_dwordx3 v1, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4859,50 +4699,47 @@ define void @v_shuffle_v3p3_v4p3__7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: global_store_dwordx3 v6, v[8:10], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
+; GFX90A-NEXT: global_store_dwordx3 v10, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx3 v6, v[8:10], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: global_store_dwordx3 v10, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4917,14 +4754,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4970,14 +4807,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4985,15 +4822,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -5002,15 +4838,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v1, v6
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5026,16 +4861,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5049,10 +4882,8 @@ define void @v_shuffle_v3p3_v4p3__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5066,10 +4897,9 @@ define void @v_shuffle_v3p3_v4p3__7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5083,43 +4913,40 @@ define void @v_shuffle_v3p3_v4p3__7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5133,42 +4960,40 @@ define void @v_shuffle_v3p3_v4p3__7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx3 v4, v[6:8], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v4
+; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx3 v4, v[6:8], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v4
+; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5342,9 +5167,8 @@ define void @v_shuffle_v3p3_v4p3__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5352,15 +5176,15 @@ define void @v_shuffle_v3p3_v4p3__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5368,15 +5192,15 @@ define void @v_shuffle_v3p3_v4p3__2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5394,12 +5218,11 @@ define void @v_shuffle_v3p3_v4p3__3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5537,10 +5360,10 @@ define void @v_shuffle_v3p3_v4p3__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5550,10 +5373,10 @@ define void @v_shuffle_v3p3_v4p3__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5563,10 +5386,10 @@ define void @v_shuffle_v3p3_v4p3__6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5580,13 +5403,13 @@ define void @v_shuffle_v3p3_v4p3__7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5626,13 +5449,12 @@ define void @v_shuffle_v3p3_v4p3__7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5673,16 +5495,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5731,15 +5551,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5787,15 +5606,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5844,16 +5662,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5863,14 +5679,13 @@ define void @v_shuffle_v3p3_v4p3__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5880,14 +5695,13 @@ define void @v_shuffle_v3p3_v4p3__7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5901,14 +5715,13 @@ define void @v_shuffle_v3p3_v4p3__7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6162,9 +5975,9 @@ define void @v_shuffle_v3p3_v4p3__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6172,15 +5985,15 @@ define void @v_shuffle_v3p3_v4p3__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6188,15 +6001,15 @@ define void @v_shuffle_v3p3_v4p3__2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6214,12 +6027,12 @@ define void @v_shuffle_v3p3_v4p3__3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6356,10 +6169,10 @@ define void @v_shuffle_v3p3_v4p3__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6369,10 +6182,10 @@ define void @v_shuffle_v3p3_v4p3__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6382,10 +6195,10 @@ define void @v_shuffle_v3p3_v4p3__6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6399,13 +6212,13 @@ define void @v_shuffle_v3p3_v4p3__7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6445,12 +6258,12 @@ define void @v_shuffle_v3p3_v4p3__7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6489,15 +6302,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6545,15 +6357,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6601,15 +6412,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6656,15 +6466,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6672,15 +6481,15 @@ define void @v_shuffle_v3p3_v4p3__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx3 v8, v[4:6], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6688,15 +6497,15 @@ define void @v_shuffle_v3p3_v4p3__7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx3 v8, v[4:6], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6759,12 +6568,13 @@ define void @v_shuffle_v3p3_v4p3__7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6802,38 +6612,37 @@ define void @v_shuffle_v3p3_v4p3__u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__u_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6920,16 +6729,15 @@ define void @v_shuffle_v3p3_v4p3__1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6937,16 +6745,15 @@ define void @v_shuffle_v3p3_v4p3__1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx3 v6, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6967,9 +6774,9 @@ define void @v_shuffle_v3p3_v4p3__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[4:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx3 v7, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6979,14 +6786,13 @@ define void @v_shuffle_v3p3_v4p3__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6996,14 +6802,13 @@ define void @v_shuffle_v3p3_v4p3__2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7021,12 +6826,12 @@ define void @v_shuffle_v3p3_v4p3__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7034,16 +6839,15 @@ define void @v_shuffle_v3p3_v4p3__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v6, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7051,16 +6855,16 @@ define void @v_shuffle_v3p3_v4p3__3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v6, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7135,11 +6939,10 @@ define void @v_shuffle_v3p3_v4p3__5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7149,11 +6952,10 @@ define void @v_shuffle_v3p3_v4p3__5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7167,41 +6969,37 @@ define void @v_shuffle_v3p3_v4p3__6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v3p3_v4p3__6_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v3p3_v4p3__6_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx3 v4, v[0:2], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx3 v5, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7215,12 +7013,12 @@ define void @v_shuffle_v3p3_v4p3__7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v4, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7261,15 +7059,14 @@ define void @v_shuffle_v3p3_v4p3__7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx3 v5, v[2:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v8, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7319,15 +7116,14 @@ define void @v_shuffle_v3p3_v4p3__7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx3 v6, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v7, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7375,15 +7171,14 @@ define void @v_shuffle_v3p3_v4p3__7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx3 v7, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7432,15 +7227,14 @@ define void @v_shuffle_v3p3_v4p3__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx3 v8, v[5:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7450,14 +7244,13 @@ define void @v_shuffle_v3p3_v4p3__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx3 v8, v[0:2], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: global_store_dwordx3 v8, v[2:4], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7467,14 +7260,13 @@ define void @v_shuffle_v3p3_v4p3__7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx3 v8, v[0:2], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: global_store_dwordx3 v8, v[2:4], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7488,13 +7280,13 @@ define void @v_shuffle_v3p3_v4p3__7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7537,13 +7329,13 @@ define void @v_shuffle_v3p3_v4p3__7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[0:2], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7583,12 +7375,13 @@ define void @v_shuffle_v3p3_v4p3__7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v3p3_v4p3__7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx3 v4, v[1:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx3 v6, v[3:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
index c7092f04a23ed..2a0344fce9f44 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v2f32.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4f32_v2f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -111,12 +110,11 @@ define void @v_shuffle_v4f32_v2f32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -154,15 +152,14 @@ define void @v_shuffle_v4f32_v2f32__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -207,15 +204,14 @@ define void @v_shuffle_v4f32_v2f32__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -262,10 +258,10 @@ define void @v_shuffle_v4f32_v2f32__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -306,12 +302,12 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -349,15 +345,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -402,14 +398,14 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -457,11 +453,11 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -502,13 +498,13 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -548,16 +544,16 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -604,15 +600,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -660,12 +656,12 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -705,14 +701,14 @@ define void @v_shuffle_v4f32_v2f32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -847,14 +843,14 @@ define void @v_shuffle_v4f32_v2f32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__1_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -939,16 +935,15 @@ define void @v_shuffle_v4f32_v2f32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -995,15 +990,14 @@ define void @v_shuffle_v4f32_v2f32__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1050,16 +1044,15 @@ define void @v_shuffle_v4f32_v2f32__3_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1108,15 +1101,15 @@ define void @v_shuffle_v4f32_v2f32__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1163,15 +1156,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1217,16 +1210,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1272,16 +1264,16 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1327,17 +1319,16 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1477,14 +1468,14 @@ define void @v_shuffle_v4f32_v2f32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1571,17 +1562,16 @@ define void @v_shuffle_v4f32_v2f32__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1629,16 +1619,15 @@ define void @v_shuffle_v4f32_v2f32__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1684,16 +1673,15 @@ define void @v_shuffle_v4f32_v2f32__3_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1742,13 +1730,13 @@ define void @v_shuffle_v4f32_v2f32__3_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1795,15 +1783,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1849,15 +1837,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1901,15 +1889,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1956,14 +1944,13 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2061,12 +2048,11 @@ define void @v_shuffle_v4f32_v2f32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2114,14 +2100,14 @@ define void @v_shuffle_v4f32_v2f32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2161,13 +2147,13 @@ define void @v_shuffle_v4f32_v2f32__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2208,15 +2194,15 @@ define void @v_shuffle_v4f32_v2f32__3_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2263,15 +2249,15 @@ define void @v_shuffle_v4f32_v2f32__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2317,14 +2303,14 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2366,14 +2352,13 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2417,15 +2402,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2475,15 +2460,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2632,16 +2617,15 @@ define void @v_shuffle_v4f32_v2f32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2736,13 +2720,13 @@ define void @v_shuffle_v4f32_v2f32__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2783,15 +2767,15 @@ define void @v_shuffle_v4f32_v2f32__3_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2838,15 +2822,15 @@ define void @v_shuffle_v4f32_v2f32__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2892,13 +2876,14 @@ define void @v_shuffle_v4f32_v2f32__3_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2938,13 +2923,13 @@ define void @v_shuffle_v4f32_v2f32__3_3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2985,15 +2970,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3040,15 +3025,15 @@ define void @v_shuffle_v4f32_v2f32__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3096,13 +3081,14 @@ define void @v_shuffle_v4f32_v2f32__3_3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v2f32__3_3_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
index 86211d4e3c3d8..504f4aad7e682 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v3f32.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4f32_v3f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4f32_v3f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -153,12 +149,11 @@ define void @v_shuffle_v4f32_v3f32__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__4_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -196,36 +191,33 @@ define void @v_shuffle_v4f32_v3f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -239,48 +231,45 @@ define void @v_shuffle_v4f32_v3f32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -294,46 +283,43 @@ define void @v_shuffle_v4f32_v3f32__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -347,16 +333,14 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -364,15 +348,14 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -380,15 +363,14 @@ define void @v_shuffle_v4f32_v3f32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -404,37 +386,35 @@ define void @v_shuffle_v4f32_v3f32__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -448,36 +428,37 @@ define void @v_shuffle_v4f32_v3f32__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -491,39 +472,37 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -537,51 +516,46 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -596,15 +570,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -612,16 +585,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -629,17 +601,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -654,15 +624,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -670,15 +639,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -686,16 +654,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -711,40 +678,38 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -758,42 +723,40 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -807,39 +770,40 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -853,50 +817,51 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -911,15 +876,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -927,15 +892,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -943,16 +908,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -967,15 +932,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -983,16 +948,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1000,16 +965,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1025,43 +990,41 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1075,45 +1038,43 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1127,42 +1088,43 @@ define void @v_shuffle_v4f32_v3f32__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1189,29 +1151,26 @@ define void @v_shuffle_v4f32_v3f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__u_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__u_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1273,42 +1232,39 @@ define void @v_shuffle_v4f32_v3f32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__1_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__1_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1321,45 +1277,43 @@ define void @v_shuffle_v4f32_v3f32__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__2_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1385,29 +1339,26 @@ define void @v_shuffle_v4f32_v3f32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__3_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__3_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1421,16 +1372,15 @@ define void @v_shuffle_v4f32_v3f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1438,15 +1388,14 @@ define void @v_shuffle_v4f32_v3f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1455,16 +1404,15 @@ define void @v_shuffle_v4f32_v3f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1480,16 +1428,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1497,17 +1444,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1515,17 +1460,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1540,49 +1483,44 @@ define void @v_shuffle_v4f32_v3f32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v5, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v5, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1597,16 +1535,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1614,17 +1551,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1632,17 +1567,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1657,16 +1590,15 @@ define void @v_shuffle_v4f32_v3f32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1676,15 +1608,13 @@ define void @v_shuffle_v4f32_v3f32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1694,15 +1624,13 @@ define void @v_shuffle_v4f32_v3f32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1717,16 +1645,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1734,17 +1661,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1752,17 +1677,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1777,15 +1700,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1793,16 +1716,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1810,16 +1732,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1834,16 +1755,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1851,17 +1771,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1869,17 +1787,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1893,17 +1809,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1911,16 +1825,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1928,16 +1841,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -1951,17 +1863,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1969,16 +1880,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1986,17 +1896,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2010,53 +1918,51 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2070,53 +1976,51 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2130,17 +2034,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2148,16 +2051,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2165,17 +2067,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2282,11 +2183,11 @@ define void @v_shuffle_v4f32_v3f32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2327,42 +2228,43 @@ define void @v_shuffle_v4f32_v3f32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2421,16 +2323,15 @@ define void @v_shuffle_v4f32_v3f32__4_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2480,16 +2381,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2497,16 +2397,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2514,17 +2413,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2539,15 +2436,14 @@ define void @v_shuffle_v4f32_v3f32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2555,16 +2451,14 @@ define void @v_shuffle_v4f32_v3f32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2572,17 +2466,14 @@ define void @v_shuffle_v4f32_v3f32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2597,15 +2488,14 @@ define void @v_shuffle_v4f32_v3f32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2613,17 +2503,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2631,17 +2519,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2656,16 +2542,15 @@ define void @v_shuffle_v4f32_v3f32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2675,15 +2560,13 @@ define void @v_shuffle_v4f32_v3f32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2693,15 +2576,13 @@ define void @v_shuffle_v4f32_v3f32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2718,14 +2599,13 @@ define void @v_shuffle_v4f32_v3f32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2733,17 +2613,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2751,17 +2629,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2776,15 +2652,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2792,15 +2668,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2808,16 +2684,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2832,16 +2707,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2849,17 +2723,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2867,17 +2739,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2891,51 +2761,46 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -2949,52 +2814,46 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3009,16 +2868,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3028,15 +2886,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3046,15 +2902,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3069,16 +2923,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3086,17 +2939,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3104,17 +2955,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3129,16 +2979,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3146,17 +2995,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3164,17 +3011,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3188,13 +3034,13 @@ define void @v_shuffle_v4f32_v3f32__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3278,14 +3124,13 @@ define void @v_shuffle_v4f32_v3f32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3324,42 +3169,43 @@ define void @v_shuffle_v4f32_v3f32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3372,13 +3218,13 @@ define void @v_shuffle_v4f32_v3f32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3418,16 +3264,15 @@ define void @v_shuffle_v4f32_v3f32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3474,17 +3319,16 @@ define void @v_shuffle_v4f32_v3f32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3492,16 +3336,15 @@ define void @v_shuffle_v4f32_v3f32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3509,16 +3352,15 @@ define void @v_shuffle_v4f32_v3f32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3532,48 +3374,46 @@ define void @v_shuffle_v4f32_v3f32__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3588,15 +3428,14 @@ define void @v_shuffle_v4f32_v3f32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3606,15 +3445,14 @@ define void @v_shuffle_v4f32_v3f32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3624,15 +3462,14 @@ define void @v_shuffle_v4f32_v3f32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3646,48 +3483,46 @@ define void @v_shuffle_v4f32_v3f32__5_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3704,14 +3539,13 @@ define void @v_shuffle_v4f32_v3f32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3719,16 +3553,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3736,16 +3569,15 @@ define void @v_shuffle_v4f32_v3f32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3760,15 +3592,15 @@ define void @v_shuffle_v4f32_v3f32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3778,14 +3610,13 @@ define void @v_shuffle_v4f32_v3f32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3795,14 +3626,13 @@ define void @v_shuffle_v4f32_v3f32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3817,16 +3647,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3834,16 +3663,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3851,16 +3679,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3875,15 +3702,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3891,16 +3717,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3908,16 +3733,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3932,16 +3756,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3951,15 +3774,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3969,15 +3790,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -3992,15 +3811,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4008,16 +3826,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4025,16 +3842,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4049,53 +3866,49 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v6
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v6
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4110,16 +3923,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4127,16 +3939,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4144,17 +3955,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4218,12 +4028,11 @@ define void @v_shuffle_v4f32_v3f32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4260,36 +4069,33 @@ define void @v_shuffle_v4f32_v3f32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4314,42 +4120,39 @@ define void @v_shuffle_v4f32_v3f32__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4363,45 +4166,43 @@ define void @v_shuffle_v4f32_v3f32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4415,14 +4216,13 @@ define void @v_shuffle_v4f32_v3f32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4464,15 +4264,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4480,17 +4280,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4498,17 +4297,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4523,15 +4321,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4539,16 +4337,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4556,17 +4353,16 @@ define void @v_shuffle_v4f32_v3f32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4581,51 +4377,49 @@ define void @v_shuffle_v4f32_v3f32__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v4
-; GFX90A-NEXT: global_store_dwordx4 v3, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v4
-; GFX942-NEXT: global_store_dwordx4 v3, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4641,43 +4435,41 @@ define void @v_shuffle_v4f32_v3f32__5_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4693,43 +4485,41 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4743,43 +4533,40 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4794,15 +4581,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4810,17 +4597,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4828,17 +4613,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4853,15 +4637,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4869,16 +4653,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4886,17 +4669,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4911,15 +4693,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4927,16 +4709,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4944,17 +4725,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -4970,40 +4750,39 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5120,16 +4899,15 @@ define void @v_shuffle_v4f32_v3f32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5179,16 +4957,15 @@ define void @v_shuffle_v4f32_v3f32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5196,16 +4973,15 @@ define void @v_shuffle_v4f32_v3f32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5213,17 +4989,15 @@ define void @v_shuffle_v4f32_v3f32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5286,11 +5060,11 @@ define void @v_shuffle_v4f32_v3f32__4_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5332,42 +5106,43 @@ define void @v_shuffle_v4f32_v3f32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5381,13 +5156,13 @@ define void @v_shuffle_v4f32_v3f32__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5429,15 +5204,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5445,17 +5220,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5463,17 +5237,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5488,15 +5261,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5504,15 +5277,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5520,16 +5293,16 @@ define void @v_shuffle_v4f32_v3f32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5544,51 +5317,49 @@ define void @v_shuffle_v4f32_v3f32__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v3, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v3, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5602,44 +5373,43 @@ define void @v_shuffle_v4f32_v3f32__5_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5653,45 +5423,43 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5705,43 +5473,40 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5756,16 +5521,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5773,17 +5537,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5791,17 +5553,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5816,16 +5577,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5833,17 +5593,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5851,17 +5610,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5876,16 +5634,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5893,16 +5650,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5910,17 +5666,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5934,44 +5689,43 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -5985,13 +5739,13 @@ define void @v_shuffle_v4f32_v3f32__u_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__u_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6090,16 +5844,15 @@ define void @v_shuffle_v4f32_v3f32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6147,16 +5900,15 @@ define void @v_shuffle_v4f32_v3f32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6164,16 +5916,16 @@ define void @v_shuffle_v4f32_v3f32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6181,17 +5933,16 @@ define void @v_shuffle_v4f32_v3f32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6251,14 +6002,13 @@ define void @v_shuffle_v4f32_v3f32__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__4_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6298,39 +6048,40 @@ define void @v_shuffle_v4f32_v3f32__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6345,16 +6096,15 @@ define void @v_shuffle_v4f32_v3f32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6362,16 +6112,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6379,16 +6129,16 @@ define void @v_shuffle_v4f32_v3f32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6403,16 +6153,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6420,16 +6169,15 @@ define void @v_shuffle_v4f32_v3f32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6437,17 +6185,16 @@ define void @v_shuffle_v4f32_v3f32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6464,14 +6211,13 @@ define void @v_shuffle_v4f32_v3f32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6481,14 +6227,14 @@ define void @v_shuffle_v4f32_v3f32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6498,15 +6244,14 @@ define void @v_shuffle_v4f32_v3f32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6520,44 +6265,43 @@ define void @v_shuffle_v4f32_v3f32__5_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6571,39 +6315,43 @@ define void @v_shuffle_v4f32_v3f32__5_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6617,42 +6365,40 @@ define void @v_shuffle_v4f32_v3f32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6667,16 +6413,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6684,17 +6429,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6702,17 +6445,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6727,16 +6469,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6746,14 +6487,13 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6763,15 +6503,14 @@ define void @v_shuffle_v4f32_v3f32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6786,16 +6525,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6803,16 +6541,15 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6820,17 +6557,16 @@ define void @v_shuffle_v4f32_v3f32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6844,45 +6580,43 @@ define void @v_shuffle_v4f32_v3f32__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
@@ -6896,42 +6630,41 @@ define void @v_shuffle_v4f32_v3f32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v3f32__5_5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[6:7], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x float> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
index d5bd41397c4f0..5fdd57da3dab7 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4f32.v4f32.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4f32_v4f32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4f32_v4f32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -142,12 +138,11 @@ define void @v_shuffle_v4f32_v4f32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -195,12 +190,11 @@ define void @v_shuffle_v4f32_v4f32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -238,36 +232,33 @@ define void @v_shuffle_v4f32_v4f32__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -281,12 +272,11 @@ define void @v_shuffle_v4f32_v4f32__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -324,16 +314,14 @@ define void @v_shuffle_v4f32_v4f32__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -378,15 +366,14 @@ define void @v_shuffle_v4f32_v4f32__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -431,16 +418,14 @@ define void @v_shuffle_v4f32_v4f32__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -485,16 +470,14 @@ define void @v_shuffle_v4f32_v4f32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,9 +491,8 @@ define void @v_shuffle_v4f32_v4f32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -524,9 +506,9 @@ define void @v_shuffle_v4f32_v4f32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -540,13 +522,12 @@ define void @v_shuffle_v4f32_v4f32__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -584,12 +565,12 @@ define void @v_shuffle_v4f32_v4f32__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -627,13 +608,12 @@ define void @v_shuffle_v4f32_v4f32__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -671,13 +651,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -688,9 +667,8 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -701,9 +679,8 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -717,17 +694,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -737,14 +712,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,15 +727,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -777,16 +747,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -794,16 +762,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -811,17 +778,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -836,15 +801,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -852,15 +816,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -868,16 +831,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -892,16 +854,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -909,16 +869,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -926,16 +885,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -949,43 +907,39 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -999,42 +953,39 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1048,13 +999,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1094,14 +1045,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1111,11 +1061,10 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1125,11 +1074,10 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1143,18 +1091,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1162,16 +1108,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1179,17 +1124,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1204,17 +1147,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1222,17 +1163,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1240,18 +1179,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1266,17 +1202,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1284,16 +1218,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1301,17 +1234,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1326,16 +1258,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1343,16 +1274,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1360,17 +1290,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1384,14 +1313,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1431,46 +1360,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1484,15 +1409,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1532,14 +1456,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1549,11 +1473,11 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,11 +1487,11 @@ define void @v_shuffle_v4f32_v4f32__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1582,13 +1506,12 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1596,13 +1519,12 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1610,13 +1532,12 @@ define void @v_shuffle_v4f32_v4f32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1692,13 +1613,12 @@ define void @v_shuffle_v4f32_v4f32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1706,13 +1626,12 @@ define void @v_shuffle_v4f32_v4f32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1728,43 +1647,39 @@ define void @v_shuffle_v4f32_v4f32__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1777,15 +1692,14 @@ define void @v_shuffle_v4f32_v4f32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1793,13 +1707,12 @@ define void @v_shuffle_v4f32_v4f32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1807,13 +1720,12 @@ define void @v_shuffle_v4f32_v4f32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1827,13 +1739,12 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1841,13 +1752,12 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1855,13 +1765,12 @@ define void @v_shuffle_v4f32_v4f32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1877,15 +1786,13 @@ define void @v_shuffle_v4f32_v4f32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1893,16 +1800,15 @@ define void @v_shuffle_v4f32_v4f32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1910,17 +1816,16 @@ define void @v_shuffle_v4f32_v4f32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1935,17 +1840,15 @@ define void @v_shuffle_v4f32_v4f32__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1953,17 +1856,15 @@ define void @v_shuffle_v4f32_v4f32__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1971,18 +1872,15 @@ define void @v_shuffle_v4f32_v4f32__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -1997,17 +1895,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2015,16 +1911,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2032,17 +1927,16 @@ define void @v_shuffle_v4f32_v4f32__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2057,16 +1951,14 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2074,16 +1966,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2091,17 +1982,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2116,17 +2005,15 @@ define void @v_shuffle_v4f32_v4f32__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2134,17 +2021,16 @@ define void @v_shuffle_v4f32_v4f32__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2152,18 +2038,16 @@ define void @v_shuffle_v4f32_v4f32__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2178,16 +2062,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2195,16 +2078,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2212,16 +2094,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2236,17 +2117,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2295,17 +2174,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2313,16 +2190,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2330,17 +2206,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2355,16 +2229,15 @@ define void @v_shuffle_v4f32_v4f32__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2414,17 +2287,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2432,16 +2303,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2449,17 +2319,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2474,17 +2342,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2492,17 +2358,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2510,18 +2374,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2535,17 +2396,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2553,16 +2412,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2570,17 +2428,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2594,18 +2450,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2613,16 +2467,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2630,17 +2483,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2654,18 +2505,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v8
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2673,17 +2522,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2691,17 +2539,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2715,17 +2562,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2733,16 +2579,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2750,16 +2595,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2773,18 +2617,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2794,15 +2636,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2812,15 +2653,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2834,17 +2674,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2852,16 +2691,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2869,17 +2707,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -2893,17 +2729,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3045,11 +2880,11 @@ define void @v_shuffle_v4f32_v4f32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3093,11 +2928,11 @@ define void @v_shuffle_v4f32_v4f32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3107,11 +2942,11 @@ define void @v_shuffle_v4f32_v4f32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3121,11 +2956,11 @@ define void @v_shuffle_v4f32_v4f32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3138,14 +2973,14 @@ define void @v_shuffle_v4f32_v4f32__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3232,16 +3067,15 @@ define void @v_shuffle_v4f32_v4f32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3291,16 +3125,15 @@ define void @v_shuffle_v4f32_v4f32__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3308,16 +3141,15 @@ define void @v_shuffle_v4f32_v4f32__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3325,17 +3157,15 @@ define void @v_shuffle_v4f32_v4f32__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3350,16 +3180,15 @@ define void @v_shuffle_v4f32_v4f32__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3409,16 +3238,14 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3426,16 +3253,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3443,17 +3269,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3468,17 +3292,14 @@ define void @v_shuffle_v4f32_v4f32__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3486,16 +3307,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3503,17 +3323,16 @@ define void @v_shuffle_v4f32_v4f32__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3528,17 +3347,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3546,16 +3363,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3563,16 +3379,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3587,16 +3402,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3645,17 +3459,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3663,16 +3475,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3680,17 +3491,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3705,16 +3514,15 @@ define void @v_shuffle_v4f32_v4f32__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3764,17 +3572,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3782,16 +3588,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3799,17 +3604,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3824,17 +3627,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3842,17 +3643,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3860,18 +3659,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3885,17 +3681,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3905,14 +3699,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3922,15 +3714,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -3944,18 +3733,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3965,15 +3751,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3983,16 +3766,12 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4007,17 +3786,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4027,15 +3804,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4045,15 +3820,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4068,17 +3841,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v8
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4088,15 +3859,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4106,15 +3875,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4129,17 +3896,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4147,17 +3912,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4165,17 +3928,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4190,16 +3951,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4207,17 +3967,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v3
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4225,17 +3983,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4250,16 +4006,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4311,10 +4066,10 @@ define void @v_shuffle_v4f32_v4f32__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4401,11 +4156,10 @@ define void @v_shuffle_v4f32_v4f32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4447,11 +4201,11 @@ define void @v_shuffle_v4f32_v4f32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4461,11 +4215,11 @@ define void @v_shuffle_v4f32_v4f32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4475,11 +4229,11 @@ define void @v_shuffle_v4f32_v4f32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4492,14 +4246,14 @@ define void @v_shuffle_v4f32_v4f32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4541,10 +4295,10 @@ define void @v_shuffle_v4f32_v4f32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4584,16 +4338,15 @@ define void @v_shuffle_v4f32_v4f32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4641,16 +4394,15 @@ define void @v_shuffle_v4f32_v4f32__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4658,16 +4410,15 @@ define void @v_shuffle_v4f32_v4f32__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4675,16 +4426,15 @@ define void @v_shuffle_v4f32_v4f32__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -4699,16 +4449,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4756,19 +4505,18 @@ define void @v_shuffle_v4f32_v4f32__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_u_2_2:
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4811,17 +4559,14 @@ define void @v_shuffle_v4f32_v4f32__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4868,15 +4613,14 @@ define void @v_shuffle_v4f32_v4f32__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4923,16 +4667,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4981,16 +4724,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5037,15 +4779,14 @@ define void @v_shuffle_v4f32_v4f32__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -5095,16 +4836,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5151,16 +4891,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5168,16 +4907,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5185,16 +4923,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5209,16 +4946,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5226,16 +4961,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5243,16 +4977,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5267,17 +5000,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5287,15 +5018,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5305,15 +5034,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5328,17 +5055,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5346,16 +5070,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5363,16 +5086,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5393,11 +5115,9 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5405,16 +5125,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5422,16 +5141,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5446,54 +5164,49 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v7
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v4
-; GFX90A-NEXT: v_mov_b32_e32 v13, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v7
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5508,16 +5221,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v7, v4
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5525,16 +5237,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5542,17 +5253,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5567,16 +5277,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5624,39 +5333,40 @@ define void @v_shuffle_v4f32_v4f32__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5717,11 +5427,10 @@ define void @v_shuffle_v4f32_v4f32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5731,11 +5440,11 @@ define void @v_shuffle_v4f32_v4f32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5745,11 +5454,11 @@ define void @v_shuffle_v4f32_v4f32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5762,42 +5471,40 @@ define void @v_shuffle_v4f32_v4f32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5810,14 +5517,14 @@ define void @v_shuffle_v4f32_v4f32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5827,11 +5534,11 @@ define void @v_shuffle_v4f32_v4f32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5841,11 +5548,11 @@ define void @v_shuffle_v4f32_v4f32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5858,39 +5565,40 @@ define void @v_shuffle_v4f32_v4f32__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5904,16 +5612,15 @@ define void @v_shuffle_v4f32_v4f32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5923,14 +5630,14 @@ define void @v_shuffle_v4f32_v4f32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5940,14 +5647,14 @@ define void @v_shuffle_v4f32_v4f32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -5962,16 +5669,15 @@ define void @v_shuffle_v4f32_v4f32__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5979,16 +5685,15 @@ define void @v_shuffle_v4f32_v4f32__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5996,16 +5701,15 @@ define void @v_shuffle_v4f32_v4f32__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6019,17 +5723,16 @@ define void @v_shuffle_v4f32_v4f32__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6039,14 +5742,14 @@ define void @v_shuffle_v4f32_v4f32__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6056,14 +5759,14 @@ define void @v_shuffle_v4f32_v4f32__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6077,16 +5780,15 @@ define void @v_shuffle_v4f32_v4f32__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6136,14 +5838,12 @@ define void @v_shuffle_v4f32_v4f32__7_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6190,15 +5890,14 @@ define void @v_shuffle_v4f32_v4f32__7_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6244,17 +5943,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6302,16 +5999,15 @@ define void @v_shuffle_v4f32_v4f32__7_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6358,16 +6054,15 @@ define void @v_shuffle_v4f32_v4f32__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6416,16 +6111,15 @@ define void @v_shuffle_v4f32_v4f32__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6472,16 +6166,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6489,16 +6182,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6506,16 +6198,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6530,15 +6221,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6546,15 +6236,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6562,16 +6251,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6586,17 +6274,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6606,15 +6292,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6624,15 +6308,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6653,11 +6335,9 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6667,15 +6347,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6685,15 +6364,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6708,15 +6386,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6724,15 +6401,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6740,16 +6416,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6764,16 +6439,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6781,16 +6455,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6798,17 +6471,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6823,16 +6495,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6840,16 +6511,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6857,17 +6527,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -6882,16 +6551,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6989,12 +6657,11 @@ define void @v_shuffle_v4f32_v4f32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7031,36 +6698,33 @@ define void @v_shuffle_v4f32_v4f32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7073,12 +6737,11 @@ define void @v_shuffle_v4f32_v4f32__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7141,13 +6804,12 @@ define void @v_shuffle_v4f32_v4f32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7155,13 +6817,12 @@ define void @v_shuffle_v4f32_v4f32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7178,43 +6839,39 @@ define void @v_shuffle_v4f32_v4f32__6_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7228,15 +6885,14 @@ define void @v_shuffle_v4f32_v4f32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7244,13 +6900,12 @@ define void @v_shuffle_v4f32_v4f32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7258,13 +6913,12 @@ define void @v_shuffle_v4f32_v4f32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7278,43 +6932,39 @@ define void @v_shuffle_v4f32_v4f32__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_u_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7329,17 +6979,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7352,11 +7000,10 @@ define void @v_shuffle_v4f32_v4f32__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7366,15 +7013,14 @@ define void @v_shuffle_v4f32_v4f32__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7389,15 +7035,15 @@ define void @v_shuffle_v4f32_v4f32__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7407,14 +7053,13 @@ define void @v_shuffle_v4f32_v4f32__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7424,15 +7069,14 @@ define void @v_shuffle_v4f32_v4f32__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7447,16 +7091,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7464,15 +7107,14 @@ define void @v_shuffle_v4f32_v4f32__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -7481,16 +7123,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7506,17 +7147,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7526,15 +7165,13 @@ define void @v_shuffle_v4f32_v4f32__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7544,15 +7181,13 @@ define void @v_shuffle_v4f32_v4f32__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7566,46 +7201,42 @@ define void @v_shuffle_v4f32_v4f32__7_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7619,14 +7250,14 @@ define void @v_shuffle_v4f32_v4f32__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7634,13 +7265,12 @@ define void @v_shuffle_v4f32_v4f32__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7648,13 +7278,12 @@ define void @v_shuffle_v4f32_v4f32__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7668,46 +7297,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7721,14 +7346,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7769,17 +7393,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7787,17 +7409,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v2
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7805,17 +7425,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7830,17 +7448,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7848,16 +7464,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7865,17 +7480,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7890,15 +7503,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7906,16 +7519,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7923,17 +7535,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -7948,17 +7559,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7966,16 +7575,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7983,17 +7591,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8007,14 +7614,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8054,15 +7661,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8157,12 +7763,12 @@ define void @v_shuffle_v4f32_v4f32__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8216,10 +7822,9 @@ define void @v_shuffle_v4f32_v4f32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8275,10 +7880,9 @@ define void @v_shuffle_v4f32_v4f32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8286,16 +7890,15 @@ define void @v_shuffle_v4f32_v4f32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8303,16 +7906,15 @@ define void @v_shuffle_v4f32_v4f32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8330,13 +7932,12 @@ define void @v_shuffle_v4f32_v4f32__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8433,11 +8034,11 @@ define void @v_shuffle_v4f32_v4f32__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8482,11 +8083,11 @@ define void @v_shuffle_v4f32_v4f32__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8496,11 +8097,11 @@ define void @v_shuffle_v4f32_v4f32__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8510,11 +8111,11 @@ define void @v_shuffle_v4f32_v4f32__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8528,14 +8129,14 @@ define void @v_shuffle_v4f32_v4f32__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8577,14 +8178,13 @@ define void @v_shuffle_v4f32_v4f32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8592,13 +8192,12 @@ define void @v_shuffle_v4f32_v4f32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8606,13 +8205,12 @@ define void @v_shuffle_v4f32_v4f32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8627,17 +8225,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8685,15 +8281,15 @@ define void @v_shuffle_v4f32_v4f32__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8741,17 +8337,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8759,15 +8353,14 @@ define void @v_shuffle_v4f32_v4f32__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -8776,16 +8369,15 @@ define void @v_shuffle_v4f32_v4f32__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8801,17 +8393,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8821,15 +8411,13 @@ define void @v_shuffle_v4f32_v4f32__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8839,15 +8427,13 @@ define void @v_shuffle_v4f32_v4f32__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8861,15 +8447,14 @@ define void @v_shuffle_v4f32_v4f32__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8877,13 +8462,12 @@ define void @v_shuffle_v4f32_v4f32__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8891,13 +8475,12 @@ define void @v_shuffle_v4f32_v4f32__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8926,13 +8509,12 @@ define void @v_shuffle_v4f32_v4f32__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8940,13 +8522,12 @@ define void @v_shuffle_v4f32_v4f32__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -8960,45 +8541,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9012,13 +8590,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9059,16 +8637,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9076,17 +8653,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9094,17 +8669,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9119,16 +8692,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9136,17 +8708,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v1
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9154,17 +8725,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9179,16 +8749,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9196,16 +8765,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9213,17 +8781,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9238,17 +8805,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9256,17 +8821,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9274,17 +8838,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9298,45 +8861,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9350,46 +8910,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9406,10 +8962,10 @@ define void @v_shuffle_v4f32_v4f32__u_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9514,10 +9070,9 @@ define void @v_shuffle_v4f32_v4f32__1_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9571,10 +9126,9 @@ define void @v_shuffle_v4f32_v4f32__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9582,16 +9136,16 @@ define void @v_shuffle_v4f32_v4f32__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v7, v6
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9599,16 +9153,16 @@ define void @v_shuffle_v4f32_v4f32__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v6
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9626,13 +9180,12 @@ define void @v_shuffle_v4f32_v4f32__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9728,11 +9281,10 @@ define void @v_shuffle_v4f32_v4f32__5_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9775,11 +9327,11 @@ define void @v_shuffle_v4f32_v4f32__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9789,11 +9341,11 @@ define void @v_shuffle_v4f32_v4f32__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9803,11 +9355,11 @@ define void @v_shuffle_v4f32_v4f32__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -9821,14 +9373,14 @@ define void @v_shuffle_v4f32_v4f32__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9868,13 +9420,13 @@ define void @v_shuffle_v4f32_v4f32__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9915,16 +9467,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9972,16 +9523,15 @@ define void @v_shuffle_v4f32_v4f32__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10031,15 +9581,14 @@ define void @v_shuffle_v4f32_v4f32__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -10088,16 +9637,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10105,16 +9653,16 @@ define void @v_shuffle_v4f32_v4f32__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v6
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10122,16 +9670,16 @@ define void @v_shuffle_v4f32_v4f32__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v6
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10192,13 +9740,14 @@ define void @v_shuffle_v4f32_v4f32__7_5_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10238,14 +9787,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10287,14 +9836,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10337,17 +9885,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v0
-; GFX900-NEXT: v_mov_b32_e32 v8, v3
-; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10355,17 +9901,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v4
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10373,17 +9918,17 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v4
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10398,17 +9943,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v1
-; GFX900-NEXT: v_mov_b32_e32 v9, v4
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10416,16 +9959,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10433,17 +9975,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10458,16 +9998,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10517,17 +10056,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10535,16 +10072,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10552,17 +10088,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10576,46 +10111,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10629,15 +10160,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10677,39 +10207,40 @@ define void @v_shuffle_v4f32_v4f32__u_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__u_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__u_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__u_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10789,10 +10320,10 @@ define void @v_shuffle_v4f32_v4f32__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10800,16 +10331,16 @@ define void @v_shuffle_v4f32_v4f32__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10817,17 +10348,16 @@ define void @v_shuffle_v4f32_v4f32__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10848,10 +10378,10 @@ define void @v_shuffle_v4f32_v4f32__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10859,16 +10389,16 @@ define void @v_shuffle_v4f32_v4f32__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10876,16 +10406,16 @@ define void @v_shuffle_v4f32_v4f32__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -10903,13 +10433,13 @@ define void @v_shuffle_v4f32_v4f32__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10917,16 +10447,16 @@ define void @v_shuffle_v4f32_v4f32__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v4, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10934,16 +10464,17 @@ define void @v_shuffle_v4f32_v4f32__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11006,11 +10537,10 @@ define void @v_shuffle_v4f32_v4f32__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11020,11 +10550,11 @@ define void @v_shuffle_v4f32_v4f32__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11034,11 +10564,11 @@ define void @v_shuffle_v4f32_v4f32__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11052,42 +10582,40 @@ define void @v_shuffle_v4f32_v4f32__6_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__6_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__6_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__6_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11101,13 +10629,13 @@ define void @v_shuffle_v4f32_v4f32__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11148,16 +10676,15 @@ define void @v_shuffle_v4f32_v4f32__7_0_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11205,16 +10732,15 @@ define void @v_shuffle_v4f32_v4f32__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11264,15 +10790,14 @@ define void @v_shuffle_v4f32_v4f32__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -11321,16 +10846,15 @@ define void @v_shuffle_v4f32_v4f32__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11338,16 +10862,16 @@ define void @v_shuffle_v4f32_v4f32__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11355,16 +10879,16 @@ define void @v_shuffle_v4f32_v4f32__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11378,15 +10902,14 @@ define void @v_shuffle_v4f32_v4f32__7_4_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_4_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11426,13 +10949,14 @@ define void @v_shuffle_v4f32_v4f32__7_5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_5_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11472,14 +10996,14 @@ define void @v_shuffle_v4f32_v4f32__7_6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11519,13 +11043,13 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11536,9 +11060,9 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11549,9 +11073,9 @@ define void @v_shuffle_v4f32_v4f32__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11566,16 +11090,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11583,16 +11106,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11600,17 +11122,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11625,16 +11145,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11642,16 +11161,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11659,17 +11178,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11684,16 +11202,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11701,16 +11218,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11718,17 +11234,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11743,16 +11258,15 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11760,16 +11274,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11777,16 +11291,16 @@ define void @v_shuffle_v4f32_v4f32__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11800,46 +11314,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_4_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_4_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11853,46 +11363,42 @@ define void @v_shuffle_v4f32_v4f32__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4f32_v4f32__7_7_5_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4f32_v4f32__7_7_5_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x float> asm "; def $0", "=v"()
@@ -11906,13 +11412,14 @@ define void @v_shuffle_v4f32_v4f32__7_7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4f32_v4f32__7_7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
index 03503c9dac197..ad2dd3a8f8073 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v2i32.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4i32_v2i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -111,12 +110,11 @@ define void @v_shuffle_v4i32_v2i32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -154,15 +152,14 @@ define void @v_shuffle_v4i32_v2i32__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -207,15 +204,14 @@ define void @v_shuffle_v4i32_v2i32__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -262,10 +258,10 @@ define void @v_shuffle_v4i32_v2i32__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -306,12 +302,12 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -349,15 +345,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -402,14 +398,14 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -457,11 +453,11 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -502,13 +498,13 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -548,16 +544,16 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -604,15 +600,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -660,12 +656,12 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -705,14 +701,14 @@ define void @v_shuffle_v4i32_v2i32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -850,14 +846,14 @@ define void @v_shuffle_v4i32_v2i32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__1_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -945,16 +941,15 @@ define void @v_shuffle_v4i32_v2i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1001,15 +996,14 @@ define void @v_shuffle_v4i32_v2i32__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1056,16 +1050,15 @@ define void @v_shuffle_v4i32_v2i32__3_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1114,15 +1107,15 @@ define void @v_shuffle_v4i32_v2i32__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1169,15 +1162,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1223,16 +1216,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1278,16 +1270,16 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1333,17 +1325,16 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1483,14 +1474,14 @@ define void @v_shuffle_v4i32_v2i32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1577,17 +1568,16 @@ define void @v_shuffle_v4i32_v2i32__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1635,16 +1625,15 @@ define void @v_shuffle_v4i32_v2i32__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1690,16 +1679,15 @@ define void @v_shuffle_v4i32_v2i32__3_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1748,13 +1736,13 @@ define void @v_shuffle_v4i32_v2i32__3_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1801,15 +1789,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1855,15 +1843,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1907,15 +1895,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1962,14 +1950,13 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2067,12 +2054,11 @@ define void @v_shuffle_v4i32_v2i32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2120,14 +2106,14 @@ define void @v_shuffle_v4i32_v2i32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2167,13 +2153,13 @@ define void @v_shuffle_v4i32_v2i32__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2214,15 +2200,15 @@ define void @v_shuffle_v4i32_v2i32__3_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2269,15 +2255,15 @@ define void @v_shuffle_v4i32_v2i32__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2323,14 +2309,14 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2372,14 +2358,13 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2423,15 +2408,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2481,15 +2466,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2638,16 +2623,15 @@ define void @v_shuffle_v4i32_v2i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2742,13 +2726,13 @@ define void @v_shuffle_v4i32_v2i32__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2789,15 +2773,15 @@ define void @v_shuffle_v4i32_v2i32__3_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2844,15 +2828,15 @@ define void @v_shuffle_v4i32_v2i32__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2898,13 +2882,14 @@ define void @v_shuffle_v4i32_v2i32__3_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2944,13 +2929,13 @@ define void @v_shuffle_v4i32_v2i32__3_3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2991,15 +2976,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3046,15 +3031,15 @@ define void @v_shuffle_v4i32_v2i32__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3102,13 +3087,14 @@ define void @v_shuffle_v4i32_v2i32__3_3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v2i32__3_3_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
index fc6d2a84d4892..2cf0f5c030d74 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4i32_v3i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -153,12 +149,11 @@ define void @v_shuffle_v4i32_v3i32__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -196,36 +191,33 @@ define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -239,48 +231,45 @@ define void @v_shuffle_v4i32_v3i32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -294,46 +283,43 @@ define void @v_shuffle_v4i32_v3i32__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -347,16 +333,14 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -364,15 +348,14 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -380,15 +363,14 @@ define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -404,37 +386,35 @@ define void @v_shuffle_v4i32_v3i32__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -448,36 +428,37 @@ define void @v_shuffle_v4i32_v3i32__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -491,39 +472,37 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -537,51 +516,46 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -596,15 +570,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -612,16 +585,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -629,17 +601,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -654,15 +624,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -670,15 +639,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -686,16 +654,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -711,40 +678,38 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -758,42 +723,40 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -807,39 +770,40 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -853,50 +817,51 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -911,15 +876,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -927,15 +892,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -943,16 +908,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -967,15 +932,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -983,16 +948,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1000,16 +965,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1025,43 +990,41 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1075,45 +1038,43 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1127,42 +1088,43 @@ define void @v_shuffle_v4i32_v3i32__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1189,29 +1151,26 @@ define void @v_shuffle_v4i32_v3i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1273,42 +1232,39 @@ define void @v_shuffle_v4i32_v3i32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1321,45 +1277,43 @@ define void @v_shuffle_v4i32_v3i32__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1385,29 +1339,26 @@ define void @v_shuffle_v4i32_v3i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1421,16 +1372,15 @@ define void @v_shuffle_v4i32_v3i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1438,15 +1388,14 @@ define void @v_shuffle_v4i32_v3i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1455,16 +1404,15 @@ define void @v_shuffle_v4i32_v3i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1480,16 +1428,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1497,17 +1444,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1515,17 +1460,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1540,49 +1483,44 @@ define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v5, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v5, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1597,16 +1535,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1614,17 +1551,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1632,17 +1567,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1657,16 +1590,15 @@ define void @v_shuffle_v4i32_v3i32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1676,15 +1608,13 @@ define void @v_shuffle_v4i32_v3i32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1694,15 +1624,13 @@ define void @v_shuffle_v4i32_v3i32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1717,16 +1645,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1734,17 +1661,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1752,17 +1677,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1777,15 +1700,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1793,16 +1716,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1810,16 +1732,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1834,16 +1755,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1851,17 +1771,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1869,17 +1787,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1893,17 +1809,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1911,16 +1825,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1928,16 +1841,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -1951,17 +1863,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1969,16 +1880,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1986,17 +1896,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2010,53 +1918,51 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2070,53 +1976,51 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2130,17 +2034,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2148,16 +2051,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2165,17 +2067,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2282,11 +2183,11 @@ define void @v_shuffle_v4i32_v3i32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2327,42 +2228,43 @@ define void @v_shuffle_v4i32_v3i32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2421,16 +2323,15 @@ define void @v_shuffle_v4i32_v3i32__4_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2480,16 +2381,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2497,16 +2397,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2514,17 +2413,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2539,15 +2436,14 @@ define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2555,16 +2451,14 @@ define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2572,17 +2466,14 @@ define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2597,15 +2488,14 @@ define void @v_shuffle_v4i32_v3i32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2613,17 +2503,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2631,17 +2519,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2656,16 +2542,15 @@ define void @v_shuffle_v4i32_v3i32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2675,15 +2560,13 @@ define void @v_shuffle_v4i32_v3i32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2693,15 +2576,13 @@ define void @v_shuffle_v4i32_v3i32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2718,14 +2599,13 @@ define void @v_shuffle_v4i32_v3i32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2733,17 +2613,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2751,17 +2629,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2776,15 +2652,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2792,15 +2668,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2808,16 +2684,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2832,16 +2707,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2849,17 +2723,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2867,17 +2739,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2891,51 +2761,46 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -2949,52 +2814,46 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3009,16 +2868,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3028,15 +2886,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3046,15 +2902,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3069,16 +2923,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3086,17 +2939,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3104,17 +2955,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3129,16 +2979,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3146,17 +2995,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3164,17 +3011,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3188,13 +3034,13 @@ define void @v_shuffle_v4i32_v3i32__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3278,14 +3124,13 @@ define void @v_shuffle_v4i32_v3i32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3324,42 +3169,43 @@ define void @v_shuffle_v4i32_v3i32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3372,13 +3218,13 @@ define void @v_shuffle_v4i32_v3i32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3418,16 +3264,15 @@ define void @v_shuffle_v4i32_v3i32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3474,17 +3319,16 @@ define void @v_shuffle_v4i32_v3i32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3492,16 +3336,15 @@ define void @v_shuffle_v4i32_v3i32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3509,16 +3352,15 @@ define void @v_shuffle_v4i32_v3i32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3532,48 +3374,46 @@ define void @v_shuffle_v4i32_v3i32__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3588,15 +3428,14 @@ define void @v_shuffle_v4i32_v3i32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3606,15 +3445,14 @@ define void @v_shuffle_v4i32_v3i32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3624,15 +3462,14 @@ define void @v_shuffle_v4i32_v3i32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3646,48 +3483,46 @@ define void @v_shuffle_v4i32_v3i32__5_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3704,14 +3539,13 @@ define void @v_shuffle_v4i32_v3i32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3719,16 +3553,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3736,16 +3569,15 @@ define void @v_shuffle_v4i32_v3i32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3760,15 +3592,15 @@ define void @v_shuffle_v4i32_v3i32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3778,14 +3610,13 @@ define void @v_shuffle_v4i32_v3i32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3795,14 +3626,13 @@ define void @v_shuffle_v4i32_v3i32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3817,16 +3647,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3834,16 +3663,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3851,16 +3679,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3875,15 +3702,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3891,16 +3717,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3908,16 +3733,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3932,16 +3756,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3951,15 +3774,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3969,15 +3790,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -3992,15 +3811,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4008,16 +3826,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4025,16 +3842,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4049,53 +3866,49 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v6
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v6
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4110,16 +3923,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4127,16 +3939,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4144,17 +3955,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4218,12 +4028,11 @@ define void @v_shuffle_v4i32_v3i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4260,36 +4069,33 @@ define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4314,42 +4120,39 @@ define void @v_shuffle_v4i32_v3i32__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4363,45 +4166,43 @@ define void @v_shuffle_v4i32_v3i32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4415,14 +4216,13 @@ define void @v_shuffle_v4i32_v3i32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4464,15 +4264,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4480,17 +4280,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4498,17 +4297,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4523,15 +4321,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4539,16 +4337,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4556,17 +4353,16 @@ define void @v_shuffle_v4i32_v3i32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4581,51 +4377,49 @@ define void @v_shuffle_v4i32_v3i32__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v4
-; GFX90A-NEXT: global_store_dwordx4 v3, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v4
-; GFX942-NEXT: global_store_dwordx4 v3, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4641,43 +4435,41 @@ define void @v_shuffle_v4i32_v3i32__5_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4693,43 +4485,41 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4743,43 +4533,40 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4794,15 +4581,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4810,17 +4597,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4828,17 +4613,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4853,15 +4637,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4869,16 +4653,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4886,17 +4669,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4911,15 +4693,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4927,16 +4709,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4944,17 +4725,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -4970,40 +4750,39 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5120,16 +4899,15 @@ define void @v_shuffle_v4i32_v3i32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5179,16 +4957,15 @@ define void @v_shuffle_v4i32_v3i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5196,16 +4973,15 @@ define void @v_shuffle_v4i32_v3i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5213,17 +4989,15 @@ define void @v_shuffle_v4i32_v3i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5286,11 +5060,11 @@ define void @v_shuffle_v4i32_v3i32__4_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5332,42 +5106,43 @@ define void @v_shuffle_v4i32_v3i32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5381,13 +5156,13 @@ define void @v_shuffle_v4i32_v3i32__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5429,15 +5204,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5445,17 +5220,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5463,17 +5237,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5488,15 +5261,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5504,15 +5277,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5520,16 +5293,16 @@ define void @v_shuffle_v4i32_v3i32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5544,51 +5317,49 @@ define void @v_shuffle_v4i32_v3i32__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v3, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v3, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5602,44 +5373,43 @@ define void @v_shuffle_v4i32_v3i32__5_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5653,45 +5423,43 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5705,43 +5473,40 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5756,16 +5521,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5773,17 +5537,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5791,17 +5553,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5816,16 +5577,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5833,17 +5593,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5851,17 +5610,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5876,16 +5634,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5893,16 +5650,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5910,17 +5666,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5934,44 +5689,43 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -5985,13 +5739,13 @@ define void @v_shuffle_v4i32_v3i32__u_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6090,16 +5844,15 @@ define void @v_shuffle_v4i32_v3i32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6147,16 +5900,15 @@ define void @v_shuffle_v4i32_v3i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6164,16 +5916,16 @@ define void @v_shuffle_v4i32_v3i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6181,17 +5933,16 @@ define void @v_shuffle_v4i32_v3i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6251,14 +6002,13 @@ define void @v_shuffle_v4i32_v3i32__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6298,39 +6048,40 @@ define void @v_shuffle_v4i32_v3i32__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6345,16 +6096,15 @@ define void @v_shuffle_v4i32_v3i32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6362,16 +6112,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6379,16 +6129,16 @@ define void @v_shuffle_v4i32_v3i32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6403,16 +6153,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6420,16 +6169,15 @@ define void @v_shuffle_v4i32_v3i32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6437,17 +6185,16 @@ define void @v_shuffle_v4i32_v3i32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6464,14 +6211,13 @@ define void @v_shuffle_v4i32_v3i32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6481,14 +6227,14 @@ define void @v_shuffle_v4i32_v3i32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6498,15 +6244,14 @@ define void @v_shuffle_v4i32_v3i32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6520,44 +6265,43 @@ define void @v_shuffle_v4i32_v3i32__5_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6571,39 +6315,43 @@ define void @v_shuffle_v4i32_v3i32__5_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6617,42 +6365,40 @@ define void @v_shuffle_v4i32_v3i32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6667,16 +6413,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6684,17 +6429,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6702,17 +6445,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6727,16 +6469,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6746,14 +6487,13 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6763,15 +6503,14 @@ define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6786,16 +6525,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6803,16 +6541,15 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6820,17 +6557,16 @@ define void @v_shuffle_v4i32_v3i32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6844,45 +6580,43 @@ define void @v_shuffle_v4i32_v3i32__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
@@ -6896,42 +6630,41 @@ define void @v_shuffle_v4i32_v3i32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[6:7], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i32> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
index ee2f94b90ffa9..c7d7bf9fa1623 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v4i32.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4i32_v4i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4i32_v4i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -142,12 +138,11 @@ define void @v_shuffle_v4i32_v4i32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -195,12 +190,11 @@ define void @v_shuffle_v4i32_v4i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -238,36 +232,33 @@ define void @v_shuffle_v4i32_v4i32__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -281,12 +272,11 @@ define void @v_shuffle_v4i32_v4i32__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -324,16 +314,14 @@ define void @v_shuffle_v4i32_v4i32__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -378,15 +366,14 @@ define void @v_shuffle_v4i32_v4i32__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -431,16 +418,14 @@ define void @v_shuffle_v4i32_v4i32__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -485,16 +470,14 @@ define void @v_shuffle_v4i32_v4i32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,9 +491,8 @@ define void @v_shuffle_v4i32_v4i32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -524,9 +506,9 @@ define void @v_shuffle_v4i32_v4i32__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -540,13 +522,12 @@ define void @v_shuffle_v4i32_v4i32__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -584,12 +565,12 @@ define void @v_shuffle_v4i32_v4i32__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -627,13 +608,12 @@ define void @v_shuffle_v4i32_v4i32__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -671,13 +651,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -688,9 +667,8 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -701,9 +679,8 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -717,17 +694,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -737,14 +712,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,15 +727,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -777,16 +747,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -794,16 +762,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -811,17 +778,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -836,15 +801,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -852,15 +816,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -868,16 +831,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -892,16 +854,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -909,16 +869,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -926,16 +885,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -949,43 +907,39 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -999,42 +953,39 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1048,13 +999,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1094,14 +1045,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1111,11 +1061,10 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1125,11 +1074,10 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1143,18 +1091,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1162,16 +1108,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1179,17 +1124,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1204,17 +1147,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1222,17 +1163,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1240,18 +1179,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1266,17 +1202,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1284,16 +1218,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1301,17 +1234,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1326,16 +1258,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1343,16 +1274,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1360,17 +1290,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1384,14 +1313,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1431,46 +1360,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1484,15 +1409,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1532,14 +1456,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1549,11 +1473,11 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,11 +1487,11 @@ define void @v_shuffle_v4i32_v4i32__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1582,13 +1506,12 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1596,13 +1519,12 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1610,13 +1532,12 @@ define void @v_shuffle_v4i32_v4i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1692,13 +1613,12 @@ define void @v_shuffle_v4i32_v4i32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1706,13 +1626,12 @@ define void @v_shuffle_v4i32_v4i32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1728,43 +1647,39 @@ define void @v_shuffle_v4i32_v4i32__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1777,15 +1692,14 @@ define void @v_shuffle_v4i32_v4i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1793,13 +1707,12 @@ define void @v_shuffle_v4i32_v4i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1807,13 +1720,12 @@ define void @v_shuffle_v4i32_v4i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1827,13 +1739,12 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1841,13 +1752,12 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1855,13 +1765,12 @@ define void @v_shuffle_v4i32_v4i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1877,15 +1786,13 @@ define void @v_shuffle_v4i32_v4i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1893,16 +1800,15 @@ define void @v_shuffle_v4i32_v4i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1910,17 +1816,16 @@ define void @v_shuffle_v4i32_v4i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1935,17 +1840,15 @@ define void @v_shuffle_v4i32_v4i32__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1953,17 +1856,15 @@ define void @v_shuffle_v4i32_v4i32__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1971,18 +1872,15 @@ define void @v_shuffle_v4i32_v4i32__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -1997,17 +1895,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2015,16 +1911,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2032,17 +1927,16 @@ define void @v_shuffle_v4i32_v4i32__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2057,16 +1951,14 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2074,16 +1966,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2091,17 +1982,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2116,17 +2005,15 @@ define void @v_shuffle_v4i32_v4i32__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2134,17 +2021,16 @@ define void @v_shuffle_v4i32_v4i32__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2152,18 +2038,16 @@ define void @v_shuffle_v4i32_v4i32__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2178,16 +2062,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2195,16 +2078,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2212,16 +2094,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2236,17 +2117,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2295,17 +2174,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2313,16 +2190,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2330,17 +2206,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2355,16 +2229,15 @@ define void @v_shuffle_v4i32_v4i32__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2414,17 +2287,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2432,16 +2303,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2449,17 +2319,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2474,17 +2342,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2492,17 +2358,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2510,18 +2374,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2535,17 +2396,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2553,16 +2412,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2570,17 +2428,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2594,18 +2450,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2613,16 +2467,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2630,17 +2483,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2654,18 +2505,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v8
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2673,17 +2522,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2691,17 +2539,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2715,17 +2562,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2733,16 +2579,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2750,16 +2595,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2773,18 +2617,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2794,15 +2636,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2812,15 +2653,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2834,17 +2674,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2852,16 +2691,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2869,17 +2707,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -2893,17 +2729,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3045,11 +2880,11 @@ define void @v_shuffle_v4i32_v4i32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3093,11 +2928,11 @@ define void @v_shuffle_v4i32_v4i32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3107,11 +2942,11 @@ define void @v_shuffle_v4i32_v4i32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3121,11 +2956,11 @@ define void @v_shuffle_v4i32_v4i32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3138,14 +2973,14 @@ define void @v_shuffle_v4i32_v4i32__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3232,16 +3067,15 @@ define void @v_shuffle_v4i32_v4i32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3291,16 +3125,15 @@ define void @v_shuffle_v4i32_v4i32__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3308,16 +3141,15 @@ define void @v_shuffle_v4i32_v4i32__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3325,17 +3157,15 @@ define void @v_shuffle_v4i32_v4i32__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3350,16 +3180,15 @@ define void @v_shuffle_v4i32_v4i32__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3409,16 +3238,14 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3426,16 +3253,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3443,17 +3269,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3468,17 +3292,14 @@ define void @v_shuffle_v4i32_v4i32__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3486,16 +3307,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3503,17 +3323,16 @@ define void @v_shuffle_v4i32_v4i32__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3528,17 +3347,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3546,16 +3363,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3563,16 +3379,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3587,16 +3402,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3645,17 +3459,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3663,16 +3475,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3680,17 +3491,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3705,16 +3514,15 @@ define void @v_shuffle_v4i32_v4i32__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3764,17 +3572,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3782,16 +3588,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3799,17 +3604,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3824,17 +3627,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3842,17 +3643,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3860,18 +3659,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3885,17 +3681,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3905,14 +3699,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3922,15 +3714,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -3944,18 +3733,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3965,15 +3751,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3983,16 +3766,12 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4007,17 +3786,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4027,15 +3804,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4045,15 +3820,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4068,17 +3841,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v8
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4088,15 +3859,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4106,15 +3875,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4129,17 +3896,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4147,17 +3912,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4165,17 +3928,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4190,16 +3951,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4207,17 +3967,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v3
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4225,17 +3983,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4250,16 +4006,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4311,10 +4066,10 @@ define void @v_shuffle_v4i32_v4i32__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4401,11 +4156,10 @@ define void @v_shuffle_v4i32_v4i32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4447,11 +4201,11 @@ define void @v_shuffle_v4i32_v4i32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4461,11 +4215,11 @@ define void @v_shuffle_v4i32_v4i32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4475,11 +4229,11 @@ define void @v_shuffle_v4i32_v4i32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4492,14 +4246,14 @@ define void @v_shuffle_v4i32_v4i32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4541,10 +4295,10 @@ define void @v_shuffle_v4i32_v4i32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4584,16 +4338,15 @@ define void @v_shuffle_v4i32_v4i32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4641,16 +4394,15 @@ define void @v_shuffle_v4i32_v4i32__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4658,16 +4410,15 @@ define void @v_shuffle_v4i32_v4i32__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4675,16 +4426,15 @@ define void @v_shuffle_v4i32_v4i32__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -4699,16 +4449,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4756,19 +4505,18 @@ define void @v_shuffle_v4i32_v4i32__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_u_2_2:
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4811,17 +4559,14 @@ define void @v_shuffle_v4i32_v4i32__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4868,15 +4613,14 @@ define void @v_shuffle_v4i32_v4i32__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4923,16 +4667,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4981,16 +4724,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5037,15 +4779,14 @@ define void @v_shuffle_v4i32_v4i32__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -5095,16 +4836,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5151,16 +4891,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5168,16 +4907,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5185,16 +4923,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5209,16 +4946,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5226,16 +4961,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5243,16 +4977,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5267,17 +5000,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5287,15 +5018,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5305,15 +5034,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5328,17 +5055,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5346,16 +5070,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5363,16 +5086,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5393,11 +5115,9 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5405,16 +5125,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5422,16 +5141,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5446,54 +5164,49 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v7
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v4
-; GFX90A-NEXT: v_mov_b32_e32 v13, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v7
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5508,16 +5221,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v7, v4
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5525,16 +5237,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5542,17 +5253,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5567,16 +5277,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5624,39 +5333,40 @@ define void @v_shuffle_v4i32_v4i32__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5717,11 +5427,10 @@ define void @v_shuffle_v4i32_v4i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5731,11 +5440,11 @@ define void @v_shuffle_v4i32_v4i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5745,11 +5454,11 @@ define void @v_shuffle_v4i32_v4i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5762,42 +5471,40 @@ define void @v_shuffle_v4i32_v4i32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5810,14 +5517,14 @@ define void @v_shuffle_v4i32_v4i32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5827,11 +5534,11 @@ define void @v_shuffle_v4i32_v4i32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5841,11 +5548,11 @@ define void @v_shuffle_v4i32_v4i32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5858,39 +5565,40 @@ define void @v_shuffle_v4i32_v4i32__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5904,16 +5612,15 @@ define void @v_shuffle_v4i32_v4i32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5923,14 +5630,14 @@ define void @v_shuffle_v4i32_v4i32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5940,14 +5647,14 @@ define void @v_shuffle_v4i32_v4i32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -5962,16 +5669,15 @@ define void @v_shuffle_v4i32_v4i32__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5979,16 +5685,15 @@ define void @v_shuffle_v4i32_v4i32__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5996,16 +5701,15 @@ define void @v_shuffle_v4i32_v4i32__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6019,17 +5723,16 @@ define void @v_shuffle_v4i32_v4i32__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6039,14 +5742,14 @@ define void @v_shuffle_v4i32_v4i32__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6056,14 +5759,14 @@ define void @v_shuffle_v4i32_v4i32__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6077,16 +5780,15 @@ define void @v_shuffle_v4i32_v4i32__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6136,14 +5838,12 @@ define void @v_shuffle_v4i32_v4i32__7_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6190,15 +5890,14 @@ define void @v_shuffle_v4i32_v4i32__7_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6244,17 +5943,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6302,16 +5999,15 @@ define void @v_shuffle_v4i32_v4i32__7_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6358,16 +6054,15 @@ define void @v_shuffle_v4i32_v4i32__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6416,16 +6111,15 @@ define void @v_shuffle_v4i32_v4i32__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6472,16 +6166,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6489,16 +6182,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6506,16 +6198,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6530,15 +6221,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6546,15 +6236,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6562,16 +6251,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6586,17 +6274,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6606,15 +6292,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6624,15 +6308,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6653,11 +6335,9 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6667,15 +6347,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6685,15 +6364,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6708,15 +6386,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6724,15 +6401,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6740,16 +6416,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6764,16 +6439,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6781,16 +6455,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6798,17 +6471,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6823,16 +6495,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6840,16 +6511,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6857,17 +6527,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -6882,16 +6551,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6989,12 +6657,11 @@ define void @v_shuffle_v4i32_v4i32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7031,36 +6698,33 @@ define void @v_shuffle_v4i32_v4i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7073,12 +6737,11 @@ define void @v_shuffle_v4i32_v4i32__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7141,13 +6804,12 @@ define void @v_shuffle_v4i32_v4i32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7155,13 +6817,12 @@ define void @v_shuffle_v4i32_v4i32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7178,43 +6839,39 @@ define void @v_shuffle_v4i32_v4i32__6_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7228,15 +6885,14 @@ define void @v_shuffle_v4i32_v4i32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7244,13 +6900,12 @@ define void @v_shuffle_v4i32_v4i32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7258,13 +6913,12 @@ define void @v_shuffle_v4i32_v4i32__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7278,43 +6932,39 @@ define void @v_shuffle_v4i32_v4i32__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_u_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7329,17 +6979,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7352,11 +7000,10 @@ define void @v_shuffle_v4i32_v4i32__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7366,15 +7013,14 @@ define void @v_shuffle_v4i32_v4i32__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7389,15 +7035,15 @@ define void @v_shuffle_v4i32_v4i32__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7407,14 +7053,13 @@ define void @v_shuffle_v4i32_v4i32__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7424,15 +7069,14 @@ define void @v_shuffle_v4i32_v4i32__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7447,16 +7091,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7464,15 +7107,14 @@ define void @v_shuffle_v4i32_v4i32__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -7481,16 +7123,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7506,17 +7147,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7526,15 +7165,13 @@ define void @v_shuffle_v4i32_v4i32__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7544,15 +7181,13 @@ define void @v_shuffle_v4i32_v4i32__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7566,46 +7201,42 @@ define void @v_shuffle_v4i32_v4i32__7_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7619,14 +7250,14 @@ define void @v_shuffle_v4i32_v4i32__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7634,13 +7265,12 @@ define void @v_shuffle_v4i32_v4i32__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7648,13 +7278,12 @@ define void @v_shuffle_v4i32_v4i32__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7668,46 +7297,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7721,14 +7346,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7769,17 +7393,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7787,17 +7409,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v2
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7805,17 +7425,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7830,17 +7448,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7848,16 +7464,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7865,17 +7480,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7890,15 +7503,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7906,16 +7519,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7923,17 +7535,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -7948,17 +7559,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7966,16 +7575,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7983,17 +7591,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8007,14 +7614,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8054,15 +7661,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8157,12 +7763,12 @@ define void @v_shuffle_v4i32_v4i32__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8216,10 +7822,9 @@ define void @v_shuffle_v4i32_v4i32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8275,10 +7880,9 @@ define void @v_shuffle_v4i32_v4i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8286,16 +7890,15 @@ define void @v_shuffle_v4i32_v4i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8303,16 +7906,15 @@ define void @v_shuffle_v4i32_v4i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8330,13 +7932,12 @@ define void @v_shuffle_v4i32_v4i32__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8433,11 +8034,11 @@ define void @v_shuffle_v4i32_v4i32__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8482,11 +8083,11 @@ define void @v_shuffle_v4i32_v4i32__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8496,11 +8097,11 @@ define void @v_shuffle_v4i32_v4i32__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8510,11 +8111,11 @@ define void @v_shuffle_v4i32_v4i32__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8528,14 +8129,14 @@ define void @v_shuffle_v4i32_v4i32__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8577,14 +8178,13 @@ define void @v_shuffle_v4i32_v4i32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8592,13 +8192,12 @@ define void @v_shuffle_v4i32_v4i32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8606,13 +8205,12 @@ define void @v_shuffle_v4i32_v4i32__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8627,17 +8225,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8685,15 +8281,15 @@ define void @v_shuffle_v4i32_v4i32__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8741,17 +8337,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8759,15 +8353,14 @@ define void @v_shuffle_v4i32_v4i32__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -8776,16 +8369,15 @@ define void @v_shuffle_v4i32_v4i32__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8801,17 +8393,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8821,15 +8411,13 @@ define void @v_shuffle_v4i32_v4i32__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8839,15 +8427,13 @@ define void @v_shuffle_v4i32_v4i32__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8861,15 +8447,14 @@ define void @v_shuffle_v4i32_v4i32__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8877,13 +8462,12 @@ define void @v_shuffle_v4i32_v4i32__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8891,13 +8475,12 @@ define void @v_shuffle_v4i32_v4i32__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8926,13 +8509,12 @@ define void @v_shuffle_v4i32_v4i32__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8940,13 +8522,12 @@ define void @v_shuffle_v4i32_v4i32__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -8960,45 +8541,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9012,13 +8590,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9059,16 +8637,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9076,17 +8653,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9094,17 +8669,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9119,16 +8692,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9136,17 +8708,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v1
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9154,17 +8725,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9179,16 +8749,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9196,16 +8765,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9213,17 +8781,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9238,17 +8805,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9256,17 +8821,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9274,17 +8838,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9298,45 +8861,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9350,46 +8910,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9406,10 +8962,10 @@ define void @v_shuffle_v4i32_v4i32__u_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9514,10 +9070,9 @@ define void @v_shuffle_v4i32_v4i32__1_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9571,10 +9126,9 @@ define void @v_shuffle_v4i32_v4i32__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9582,16 +9136,16 @@ define void @v_shuffle_v4i32_v4i32__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v7, v6
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9599,16 +9153,16 @@ define void @v_shuffle_v4i32_v4i32__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v6
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9626,13 +9180,12 @@ define void @v_shuffle_v4i32_v4i32__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9728,11 +9281,10 @@ define void @v_shuffle_v4i32_v4i32__5_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9775,11 +9327,11 @@ define void @v_shuffle_v4i32_v4i32__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9789,11 +9341,11 @@ define void @v_shuffle_v4i32_v4i32__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9803,11 +9355,11 @@ define void @v_shuffle_v4i32_v4i32__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -9821,14 +9373,14 @@ define void @v_shuffle_v4i32_v4i32__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9868,13 +9420,13 @@ define void @v_shuffle_v4i32_v4i32__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9915,16 +9467,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9972,16 +9523,15 @@ define void @v_shuffle_v4i32_v4i32__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10031,15 +9581,14 @@ define void @v_shuffle_v4i32_v4i32__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -10088,16 +9637,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10105,16 +9653,16 @@ define void @v_shuffle_v4i32_v4i32__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v6
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10122,16 +9670,16 @@ define void @v_shuffle_v4i32_v4i32__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v6
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10192,13 +9740,14 @@ define void @v_shuffle_v4i32_v4i32__7_5_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10238,14 +9787,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10287,14 +9836,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10337,17 +9885,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v0
-; GFX900-NEXT: v_mov_b32_e32 v8, v3
-; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10355,17 +9901,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v4
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10373,17 +9918,17 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v4
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10398,17 +9943,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v1
-; GFX900-NEXT: v_mov_b32_e32 v9, v4
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10416,16 +9959,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10433,17 +9975,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10458,16 +9998,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10517,17 +10056,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10535,16 +10072,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10552,17 +10088,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10576,46 +10111,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10629,15 +10160,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10677,39 +10207,40 @@ define void @v_shuffle_v4i32_v4i32__u_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__u_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__u_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__u_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10789,10 +10320,10 @@ define void @v_shuffle_v4i32_v4i32__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10800,16 +10331,16 @@ define void @v_shuffle_v4i32_v4i32__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10817,17 +10348,16 @@ define void @v_shuffle_v4i32_v4i32__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10848,10 +10378,10 @@ define void @v_shuffle_v4i32_v4i32__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10859,16 +10389,16 @@ define void @v_shuffle_v4i32_v4i32__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10876,16 +10406,16 @@ define void @v_shuffle_v4i32_v4i32__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -10903,13 +10433,13 @@ define void @v_shuffle_v4i32_v4i32__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10917,16 +10447,16 @@ define void @v_shuffle_v4i32_v4i32__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v4, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10934,16 +10464,17 @@ define void @v_shuffle_v4i32_v4i32__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11006,11 +10537,10 @@ define void @v_shuffle_v4i32_v4i32__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11020,11 +10550,11 @@ define void @v_shuffle_v4i32_v4i32__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11034,11 +10564,11 @@ define void @v_shuffle_v4i32_v4i32__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11052,42 +10582,40 @@ define void @v_shuffle_v4i32_v4i32__6_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__6_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__6_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__6_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11101,13 +10629,13 @@ define void @v_shuffle_v4i32_v4i32__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11148,16 +10676,15 @@ define void @v_shuffle_v4i32_v4i32__7_0_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11205,16 +10732,15 @@ define void @v_shuffle_v4i32_v4i32__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11264,15 +10790,14 @@ define void @v_shuffle_v4i32_v4i32__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -11321,16 +10846,15 @@ define void @v_shuffle_v4i32_v4i32__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11338,16 +10862,16 @@ define void @v_shuffle_v4i32_v4i32__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11355,16 +10879,16 @@ define void @v_shuffle_v4i32_v4i32__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11378,15 +10902,14 @@ define void @v_shuffle_v4i32_v4i32__7_4_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_4_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11426,13 +10949,14 @@ define void @v_shuffle_v4i32_v4i32__7_5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_5_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11472,14 +10996,14 @@ define void @v_shuffle_v4i32_v4i32__7_6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11519,13 +11043,13 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11536,9 +11060,9 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11549,9 +11073,9 @@ define void @v_shuffle_v4i32_v4i32__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11566,16 +11090,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11583,16 +11106,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11600,17 +11122,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11625,16 +11145,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11642,16 +11161,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11659,17 +11178,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11684,16 +11202,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11701,16 +11218,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11718,17 +11234,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11743,16 +11258,15 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11760,16 +11274,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11777,16 +11291,16 @@ define void @v_shuffle_v4i32_v4i32__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11800,46 +11314,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_4_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_4_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11853,46 +11363,42 @@ define void @v_shuffle_v4i32_v4i32__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i32_v4i32__7_7_5_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i32_v4i32__7_7_5_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i32> asm "; def $0", "=v"()
@@ -11906,13 +11412,14 @@ define void @v_shuffle_v4i32_v4i32__7_7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i32_v4i32__7_7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
index 21ec9acf6317d..79dde44bcbdec 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
@@ -58,39 +58,33 @@ define void @v_shuffle_v4i64_v2i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -114,39 +108,33 @@ define void @v_shuffle_v4i64_v2i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -160,55 +148,42 @@ define void @v_shuffle_v4i64_v2i64__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -222,49 +197,43 @@ define void @v_shuffle_v4i64_v2i64__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -291,31 +260,27 @@ define void @v_shuffle_v4i64_v2i64__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -329,39 +294,40 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -375,54 +341,54 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -436,57 +402,51 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -500,45 +460,42 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -552,44 +509,47 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
%vec1 = call <2 x i64> asm "; def $0", "=v"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
@@ -601,63 +561,54 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -671,57 +622,55 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -735,54 +684,52 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -796,42 +743,43 @@ define void @v_shuffle_v4i64_v2i64__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -846,13 +794,13 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -860,13 +808,13 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -874,13 +822,13 @@ define void @v_shuffle_v4i64_v2i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -943,53 +891,47 @@ define void @v_shuffle_v4i64_v2i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__1_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v6, v2
; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v6, v2
; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1003,13 +945,13 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1017,13 +959,13 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1031,13 +973,13 @@ define void @v_shuffle_v4i64_v2i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1051,21 +993,16 @@ define void @v_shuffle_v4i64_v2i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1073,21 +1010,16 @@ define void @v_shuffle_v4i64_v2i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1095,22 +1027,16 @@ define void @v_shuffle_v4i64_v2i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1125,19 +1051,16 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1145,19 +1068,16 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1165,20 +1085,16 @@ define void @v_shuffle_v4i64_v2i64__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1192,63 +1108,58 @@ define void @v_shuffle_v4i64_v2i64__3_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_1_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v12, v0
-; GFX90A-NEXT: v_mov_b32_e32 v13, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1263,21 +1174,19 @@ define void @v_shuffle_v4i64_v2i64__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1285,21 +1194,19 @@ define void @v_shuffle_v4i64_v2i64__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1307,21 +1214,19 @@ define void @v_shuffle_v4i64_v2i64__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1336,19 +1241,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1356,19 +1261,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1376,20 +1281,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1404,19 +1308,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1424,19 +1325,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1444,20 +1342,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1474,17 +1368,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1494,17 +1387,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1514,18 +1406,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1540,20 +1430,18 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1561,20 +1449,18 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1582,20 +1468,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1609,42 +1494,43 @@ define void @v_shuffle_v4i64_v2i64__u_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__u_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1657,49 +1543,43 @@ define void @v_shuffle_v4i64_v2i64__0_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__0_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__0_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__0_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1712,42 +1592,43 @@ define void @v_shuffle_v4i64_v2i64__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__1_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1760,42 +1641,43 @@ define void @v_shuffle_v4i64_v2i64__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1809,19 +1691,16 @@ define void @v_shuffle_v4i64_v2i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1829,19 +1708,16 @@ define void @v_shuffle_v4i64_v2i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1849,19 +1725,16 @@ define void @v_shuffle_v4i64_v2i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1876,19 +1749,16 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1896,19 +1766,16 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1916,19 +1783,17 @@ define void @v_shuffle_v4i64_v2i64__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -1942,69 +1807,52 @@ define void @v_shuffle_v4i64_v2i64__3_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: v_mov_b32_e32 v10, v2
-; GFX900-NEXT: v_mov_b32_e32 v11, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: v_mov_b32_e32 v12, v2
-; GFX90A-NEXT: v_mov_b32_e32 v13, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2018,66 +1866,58 @@ define void @v_shuffle_v4i64_v2i64__3_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2092,18 +1932,18 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2111,18 +1951,18 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2130,18 +1970,18 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2155,51 +1995,55 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2213,51 +2057,54 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2272,18 +2119,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2291,18 +2139,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2310,18 +2159,20 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2385,39 +2236,33 @@ define void @v_shuffle_v4i64_v2i64__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2443,53 +2288,47 @@ define void @v_shuffle_v4i64_v2i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v6, v2
; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v6, v2
; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2503,54 +2342,43 @@ define void @v_shuffle_v4i64_v2i64__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2564,70 +2392,52 @@ define void @v_shuffle_v4i64_v2i64__3_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2642,20 +2452,16 @@ define void @v_shuffle_v4i64_v2i64__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v4
-; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2663,20 +2469,16 @@ define void @v_shuffle_v4i64_v2i64__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: v_mov_b32_e32 v12, v4
-; GFX90A-NEXT: v_mov_b32_e32 v13, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2684,21 +2486,17 @@ define void @v_shuffle_v4i64_v2i64__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2712,54 +2510,49 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2773,48 +2566,42 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v0, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2831,15 +2618,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2849,15 +2635,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2867,15 +2652,14 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2889,63 +2673,51 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -2959,42 +2731,43 @@ define void @v_shuffle_v4i64_v2i64__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3009,16 +2782,18 @@ define void @v_shuffle_v4i64_v2i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3026,16 +2801,18 @@ define void @v_shuffle_v4i64_v2i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3043,17 +2820,19 @@ define void @v_shuffle_v4i64_v2i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3070,17 +2849,16 @@ define void @v_shuffle_v4i64_v2i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3090,17 +2868,16 @@ define void @v_shuffle_v4i64_v2i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3110,17 +2887,16 @@ define void @v_shuffle_v4i64_v2i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3134,49 +2910,43 @@ define void @v_shuffle_v4i64_v2i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3190,42 +2960,43 @@ define void @v_shuffle_v4i64_v2i64__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3239,61 +3010,61 @@ define void @v_shuffle_v4i64_v2i64__3_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3308,18 +3079,19 @@ define void @v_shuffle_v4i64_v2i64__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3327,18 +3099,19 @@ define void @v_shuffle_v4i64_v2i64__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3346,18 +3119,20 @@ define void @v_shuffle_v4i64_v2i64__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3371,58 +3146,52 @@ define void @v_shuffle_v4i64_v2i64__3_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3436,45 +3205,42 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3489,16 +3255,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3506,16 +3275,19 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3523,17 +3295,20 @@ define void @v_shuffle_v4i64_v2i64__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3550,17 +3325,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3570,17 +3344,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3590,18 +3363,16 @@ define void @v_shuffle_v4i64_v2i64__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
@@ -3615,45 +3386,42 @@ define void @v_shuffle_v4i64_v2i64__3_3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v2i64__3_3_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v2i64__3_3_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
index 615b382aa355a..97a9a0f94944d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
@@ -100,39 +100,33 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -196,39 +190,33 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -242,55 +230,42 @@ define void @v_shuffle_v4i64_v3i64__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -304,49 +279,43 @@ define void @v_shuffle_v4i64_v3i64__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -360,49 +329,43 @@ define void @v_shuffle_v4i64_v3i64__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -416,45 +379,40 @@ define void @v_shuffle_v4i64_v3i64__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -468,39 +426,40 @@ define void @v_shuffle_v4i64_v3i64__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -514,39 +473,40 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -560,51 +520,54 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -618,51 +581,54 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -676,57 +642,51 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -740,45 +700,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -792,45 +749,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -844,51 +798,45 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -902,67 +850,54 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -976,58 +911,55 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1041,57 +973,55 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1105,54 +1035,52 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1166,51 +1094,52 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1224,42 +1153,43 @@ define void @v_shuffle_v4i64_v3i64__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1274,13 +1204,13 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1288,13 +1218,13 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1302,13 +1232,13 @@ define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1370,15 +1300,15 @@ define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1386,17 +1316,15 @@ define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1404,17 +1332,15 @@ define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1430,16 +1356,13 @@ define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1449,16 +1372,13 @@ define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1468,16 +1388,13 @@ define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1491,13 +1408,13 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1505,13 +1422,13 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1519,13 +1436,13 @@ define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1604,20 +1521,16 @@ define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1625,20 +1538,16 @@ define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1646,21 +1555,16 @@ define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1675,19 +1579,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1695,19 +1596,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1715,20 +1613,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1742,64 +1636,58 @@ define void @v_shuffle_v4i64_v3i64__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v12, v4
+; GFX900-NEXT: v_mov_b32_e32 v13, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v12, v4
+; GFX90A-NEXT: v_mov_b32_e32 v13, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v12, v4
+; GFX942-NEXT: v_mov_b32_e32 v13, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1814,19 +1702,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,19 +1722,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1854,19 +1742,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1881,21 +1769,19 @@ define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1903,21 +1789,19 @@ define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1925,21 +1809,19 @@ define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -1954,19 +1836,19 @@ define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1974,19 +1856,19 @@ define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1994,20 +1876,19 @@ define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2022,18 +1903,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2041,18 +1923,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2060,19 +1943,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2087,18 +1970,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2106,18 +1987,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2125,19 +2004,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2152,18 +2028,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2171,18 +2047,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2190,19 +2066,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2217,20 +2092,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2238,20 +2111,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2259,20 +2130,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2287,19 +2156,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2307,19 +2175,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2327,20 +2194,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2355,21 +2220,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2377,21 +2239,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2399,22 +2258,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2575,14 +2431,13 @@ define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2592,14 +2447,13 @@ define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2609,14 +2463,13 @@ define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2743,18 +2596,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2762,18 +2613,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2781,19 +2630,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2808,18 +2654,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2827,18 +2671,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2846,19 +2688,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2873,21 +2712,16 @@ define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2895,21 +2729,16 @@ define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2917,22 +2746,16 @@ define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -2947,19 +2770,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2967,19 +2790,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2987,19 +2810,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3014,20 +2837,19 @@ define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3035,20 +2857,19 @@ define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3056,20 +2877,19 @@ define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3084,18 +2904,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3104,18 +2924,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3124,19 +2944,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3152,18 +2971,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3171,18 +2991,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3190,19 +3011,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3216,51 +3037,55 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3274,51 +3099,54 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3333,18 +3161,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3352,18 +3180,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3371,18 +3199,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3397,39 +3225,37 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
+; GFX900-NEXT: v_mov_b32_e32 v2, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3437,20 +3263,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3465,18 +3290,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3484,18 +3310,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3503,19 +3330,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3529,42 +3357,43 @@ define void @v_shuffle_v4i64_v3i64__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3580,11 +3409,13 @@ define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3594,11 +3425,13 @@ define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3608,11 +3441,13 @@ define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3625,49 +3460,43 @@ define void @v_shuffle_v4i64_v3i64__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3680,42 +3509,43 @@ define void @v_shuffle_v4i64_v3i64__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3728,42 +3558,43 @@ define void @v_shuffle_v4i64_v3i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3777,17 +3608,17 @@ define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: v_mov_b32_e32 v10, v4
; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3796,17 +3627,17 @@ define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: v_mov_b32_e32 v10, v4
; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3815,17 +3646,18 @@ define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_mov_b32_e32 v10, v4
; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3841,19 +3673,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3861,19 +3690,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3881,19 +3707,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3908,18 +3731,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3927,18 +3748,16 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3946,18 +3765,17 @@ define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -3971,66 +3789,52 @@ define void @v_shuffle_v4i64_v3i64__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4044,63 +3848,52 @@ define void @v_shuffle_v4i64_v3i64__5_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4114,63 +3907,58 @@ define void @v_shuffle_v4i64_v3i64__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v14, v8
+; GFX900-NEXT: v_mov_b32_e32 v15, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v14, v8
+; GFX90A-NEXT: v_mov_b32_e32 v15, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v14, v8
+; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4185,18 +3973,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v12, v8
+; GFX900-NEXT: v_mov_b32_e32 v13, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4204,18 +3992,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v12, v8
+; GFX90A-NEXT: v_mov_b32_e32 v13, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4223,18 +4011,18 @@ define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v8
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4249,18 +4037,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4268,18 +4056,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4287,18 +4075,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4312,51 +4100,55 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4371,18 +4163,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4390,18 +4183,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: v_mov_b32_e32 v8, v10
; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4409,18 +4203,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v8, v10
; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4434,51 +4229,54 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4493,19 +4291,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4513,19 +4310,18 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4533,20 +4329,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4561,18 +4356,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4580,18 +4376,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4599,18 +4396,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4716,39 +4515,33 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4773,15 +4566,15 @@ define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4789,17 +4582,15 @@ define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4807,17 +4598,15 @@ define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4834,16 +4623,13 @@ define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4853,16 +4639,13 @@ define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4872,16 +4655,13 @@ define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4902,10 +4682,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4919,10 +4696,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4936,10 +4710,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -4956,19 +4727,14 @@ define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4978,19 +4744,14 @@ define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5000,20 +4761,14 @@ define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5028,18 +4783,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5047,18 +4800,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5066,19 +4817,17 @@ define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5093,18 +4842,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5112,18 +4859,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5131,18 +4876,17 @@ define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5156,54 +4900,49 @@ define void @v_shuffle_v4i64_v3i64__5_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5220,14 +4959,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5237,14 +4975,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5254,14 +4991,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5275,51 +5011,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5336,14 +5063,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5353,14 +5080,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5370,14 +5097,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5394,14 +5121,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5411,14 +5138,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5428,14 +5155,14 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5449,63 +5176,51 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5519,54 +5234,49 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5763,15 +5473,15 @@ define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v8
-; GFX900-NEXT: v_mov_b32_e32 v11, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5782,15 +5492,15 @@ define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v8
-; GFX90A-NEXT: v_mov_b32_e32 v11, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5801,15 +5511,15 @@ define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5924,14 +5634,13 @@ define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5941,14 +5650,13 @@ define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5958,14 +5666,13 @@ define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -5986,10 +5693,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6003,10 +5707,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6020,10 +5721,7 @@ define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6040,19 +5738,14 @@ define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6062,19 +5755,14 @@ define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6084,20 +5772,14 @@ define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6112,18 +5794,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6131,18 +5811,16 @@ define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6150,19 +5828,17 @@ define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6177,18 +5853,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6196,18 +5870,16 @@ define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6215,18 +5887,17 @@ define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6240,60 +5911,56 @@ define void @v_shuffle_v4i64_v3i64__5_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v6, v2
; GFX900-NEXT: v_mov_b32_e32 v7, v3
; GFX900-NEXT: v_mov_b32_e32 v8, v2
; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
; GFX90A-NEXT: v_mov_b32_e32 v10, v2
; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v2
+; GFX90A-NEXT: v_mov_b32_e32 v13, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v10, v2
; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v2
+; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6310,14 +5977,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6327,14 +5993,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6344,14 +6009,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6365,45 +6029,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6418,18 +6079,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6437,18 +6099,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6456,19 +6119,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6483,18 +6147,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6502,18 +6167,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6521,19 +6187,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6550,17 +6217,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6570,17 +6236,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6590,18 +6255,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6615,45 +6278,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6667,42 +6327,43 @@ define void @v_shuffle_v4i64_v3i64__u_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6717,18 +6378,18 @@ define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6736,18 +6397,18 @@ define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6755,19 +6416,19 @@ define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6782,18 +6443,18 @@ define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6801,18 +6462,18 @@ define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6820,19 +6481,19 @@ define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6849,17 +6510,16 @@ define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6869,17 +6529,16 @@ define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6889,17 +6548,16 @@ define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6916,11 +6574,13 @@ define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6930,11 +6590,13 @@ define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6944,11 +6606,13 @@ define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -6962,49 +6626,43 @@ define void @v_shuffle_v4i64_v3i64__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7018,48 +6676,43 @@ define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7073,64 +6726,61 @@ define void @v_shuffle_v4i64_v3i64__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7145,18 +6795,19 @@ define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v6, v8
; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7164,18 +6815,19 @@ define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v6, v8
; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7183,19 +6835,20 @@ define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v6, v8
; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7210,18 +6863,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7229,18 +6883,19 @@ define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7248,18 +6903,20 @@ define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7273,51 +6930,52 @@ define void @v_shuffle_v4i64_v3i64__5_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7331,54 +6989,52 @@ define void @v_shuffle_v4i64_v3i64__5_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7392,45 +7048,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7445,18 +7098,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7464,18 +7118,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7483,19 +7138,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7510,18 +7166,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7529,18 +7186,19 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7548,19 +7206,20 @@ define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7577,17 +7236,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7597,17 +7255,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7617,18 +7274,16 @@ define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7645,11 +7300,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7659,11 +7316,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7673,11 +7332,13 @@ define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7691,45 +7352,42 @@ define void @v_shuffle_v4i64_v3i64__5_5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=v"()
@@ -7865,10 +7523,9 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7966,10 +7623,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7986,15 +7642,13 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8004,15 +7658,13 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8075,11 +7727,11 @@ define void @s_shuffle_v4i64_v3i64__5_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8130,13 +7782,11 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8149,46 +7799,18 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
}
define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
@@ -8201,10 +7823,10 @@ define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -8217,50 +7839,18 @@ define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
}
define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
@@ -8269,65 +7859,21 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
}
define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
@@ -8340,17 +7886,15 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8360,17 +7904,15 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8380,16 +7922,14 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -8410,12 +7950,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8428,12 +7966,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8445,13 +7981,12 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8464,52 +7999,20 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
}
define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
@@ -8518,56 +8021,20 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
}
define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
@@ -8580,12 +8047,12 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_u() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -8602,17 +8069,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8622,17 +8089,17 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8642,16 +8109,16 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -8665,65 +8132,23 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
@@ -8732,65 +8157,25 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_1() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
@@ -8799,62 +8184,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_2() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
@@ -8863,58 +8208,22 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_3() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
@@ -8927,14 +8236,14 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -8947,56 +8256,20 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -9027,172 +8300,427 @@ define void @s_shuffle_v4i64_v3i64__0_0_0_0() {
}
define void @s_shuffle_v4i64_v3i64__1_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
+define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_1_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_2_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_3_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_4_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+define void @s_shuffle_v4i64_v3i64__5_5_1_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -9200,72 +8728,94 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
+define void @s_shuffle_v4i64_v3i64__5_5_2_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[16:21]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s20
+; GFX9-NEXT: s_mov_b32 s13, s21
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_3_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -9273,70 +8823,66 @@ define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
+define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -9344,3422 +8890,640 @@ define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
+define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_1_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
+define void @s_shuffle_v4i64_v3i64__5_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_2_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_3_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_0_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_4_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_2_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_3_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_4_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_1_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_5_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_2_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_3_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_5_0_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_5_2_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
+define void @s_shuffle_v4i64_v3i64__5_5_3_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_3_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_4_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_0_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_2_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_3_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__4_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_0_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[20:25]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s24
-; GFX900-NEXT: s_mov_b32 s9, s25
-; GFX900-NEXT: s_mov_b32 s10, s24
-; GFX900-NEXT: s_mov_b32 s11, s25
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[20:25]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s24
-; GFX90A-NEXT: s_mov_b32 s9, s25
-; GFX90A-NEXT: s_mov_b32 s10, s24
-; GFX90A-NEXT: s_mov_b32 s11, s25
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_1_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_3_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[20:25]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s24
-; GFX900-NEXT: s_mov_b32 s9, s25
-; GFX900-NEXT: s_mov_b32 s10, s24
-; GFX900-NEXT: s_mov_b32 s11, s25
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[20:25]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s24
-; GFX90A-NEXT: s_mov_b32 s9, s25
-; GFX90A-NEXT: s_mov_b32 s10, s24
-; GFX90A-NEXT: s_mov_b32 s11, s25
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_4_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[20:25]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s24
-; GFX900-NEXT: s_mov_b32 s9, s25
-; GFX900-NEXT: s_mov_b32 s10, s24
-; GFX900-NEXT: s_mov_b32 s11, s25
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[20:25]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s24
-; GFX90A-NEXT: s_mov_b32 s9, s25
-; GFX90A-NEXT: s_mov_b32 s10, s24
-; GFX90A-NEXT: s_mov_b32 s11, s25
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__0_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__4_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:21]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
+define void @s_shuffle_v4i64_v3i64__4_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -12767,16 +9531,19 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -12784,1637 +9551,1983 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_0_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
+define void @s_shuffle_v4i64_v3i64__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:21]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_1_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
+define void @s_shuffle_v4i64_v3i64__5_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:21]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_2_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
+define void @s_shuffle_v4i64_v3i64__5_0_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
+define void @s_shuffle_v4i64_v3i64__5_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
+define void @s_shuffle_v4i64_v3i64__5_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
+define void @s_shuffle_v4i64_v3i64__5_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
+define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+define void @s_shuffle_v4i64_v3i64__5_5_0_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+define void @s_shuffle_v4i64_v3i64__5_5_1_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_3_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_4_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+define void @s_shuffle_v4i64_v3i64__u_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_3_3_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
+define void @s_shuffle_v4i64_v3i64__0_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
+define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
+define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__3_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__4_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
+define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
+define void @s_shuffle_v4i64_v3i64__5_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
+define void @s_shuffle_v4i64_v3i64__5_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_0_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
+define void @s_shuffle_v4i64_v3i64__5_4_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_0_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_1_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_1_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
+define void @s_shuffle_v4i64_v3i64__5_5_2_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_2_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
+define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
+define void @s_shuffle_v4i64_v3i64__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
+define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
+define void @s_shuffle_v4i64_v3i64__5_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
+define void @s_shuffle_v4i64_v3i64__5_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+define void @s_shuffle_v4i64_v3i64__5_3_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
+define void @s_shuffle_v4i64_v3i64__5_5_0_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_1_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_2_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
+define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -14423,8 +11536,8 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s16
; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
@@ -14434,7 +11547,7 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -14443,8 +11556,8 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s16
; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
@@ -14454,7 +11567,7 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -14463,8 +11576,8 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s12, s4
; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: s_mov_b32 s14, s4
@@ -14475,25 +11588,25 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
+define void @s_shuffle_v4i64_v3i64__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14501,19 +11614,19 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14521,7 +11634,7 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -14530,10 +11643,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -14542,22 +11655,23 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
+define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s16
; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
@@ -14567,16 +11681,17 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s16
; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
@@ -14586,16 +11701,18 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s12, s4
; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: s_mov_b32 s14, s4
@@ -14606,20 +11723,20 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: s_mov_b32 s14, s12
; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
@@ -14628,399 +11745,518 @@ define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_0_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
+define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_1_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
+define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_2_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
+define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s20
-; GFX900-NEXT: s_mov_b32 s15, s21
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s20
-; GFX90A-NEXT: s_mov_b32 s15, s21
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_3_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
+define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_0_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_1_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v3i64__5_5_4_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
+define void @s_shuffle_v4i64_v3i64__5_5_2_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_3_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__5_5_4_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
index 32f6e00716e37..519c90672016d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
@@ -139,39 +139,33 @@ define void @v_shuffle_v4i64_v4i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__3_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -275,39 +269,33 @@ define void @v_shuffle_v4i64_v4i64__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -321,55 +309,42 @@ define void @v_shuffle_v4i64_v4i64__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -383,49 +358,43 @@ define void @v_shuffle_v4i64_v4i64__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -439,49 +408,43 @@ define void @v_shuffle_v4i64_v4i64__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -495,49 +458,43 @@ define void @v_shuffle_v4i64_v4i64__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -551,45 +508,40 @@ define void @v_shuffle_v4i64_v4i64__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -603,39 +555,40 @@ define void @v_shuffle_v4i64_v4i64__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -649,39 +602,40 @@ define void @v_shuffle_v4i64_v4i64__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_6_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -695,39 +649,40 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -741,51 +696,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -799,51 +757,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -857,51 +818,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -915,57 +879,52 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -979,42 +938,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1028,45 +987,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1080,45 +1036,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_6_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1132,48 +1085,45 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1187,64 +1137,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1258,58 +1198,55 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1323,58 +1260,55 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1388,57 +1322,55 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1452,57 +1384,52 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1516,48 +1443,52 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1571,51 +1502,52 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1629,42 +1561,43 @@ define void @v_shuffle_v4i64_v4i64__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1679,13 +1612,13 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1693,13 +1626,13 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1707,13 +1640,13 @@ define void @v_shuffle_v4i64_v4i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1775,15 +1708,15 @@ define void @v_shuffle_v4i64_v4i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1791,15 +1724,15 @@ define void @v_shuffle_v4i64_v4i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1807,15 +1740,15 @@ define void @v_shuffle_v4i64_v4i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1885,15 +1818,13 @@ define void @v_shuffle_v4i64_v4i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1903,15 +1834,13 @@ define void @v_shuffle_v4i64_v4i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1921,15 +1850,13 @@ define void @v_shuffle_v4i64_v4i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -1943,13 +1870,13 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1957,13 +1884,13 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1971,13 +1898,13 @@ define void @v_shuffle_v4i64_v4i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2121,20 +2048,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2142,20 +2065,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2163,21 +2082,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2192,19 +2106,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2212,19 +2123,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2232,20 +2140,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2260,64 +2164,58 @@ define void @v_shuffle_v4i64_v4i64__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v14, v4
+; GFX90A-NEXT: v_mov_b32_e32 v15, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v14, v4
+; GFX942-NEXT: v_mov_b32_e32 v15, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2332,19 +2230,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2352,19 +2250,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2372,19 +2270,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2399,18 +2297,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2418,18 +2317,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2437,18 +2337,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2463,21 +2364,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2485,21 +2384,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2507,21 +2404,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2536,19 +2431,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2556,19 +2451,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2576,20 +2471,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2604,18 +2498,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2623,18 +2518,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2642,19 +2538,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2669,18 +2565,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2688,18 +2585,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2707,19 +2605,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2733,19 +2631,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2753,18 +2650,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2772,19 +2667,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v0, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2799,18 +2691,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, v8
+; GFX900-NEXT: v_mov_b32_e32 v13, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2818,18 +2710,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v8
+; GFX90A-NEXT: v_mov_b32_e32 v13, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2837,19 +2729,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v8
+; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2864,18 +2755,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, v8
+; GFX900-NEXT: v_mov_b32_e32 v15, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2883,18 +2774,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, v8
+; GFX90A-NEXT: v_mov_b32_e32 v15, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2902,19 +2793,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v14, v8
+; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2928,21 +2818,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2950,20 +2839,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2971,20 +2858,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -2999,18 +2884,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3018,18 +2903,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3037,19 +2922,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3064,19 +2948,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3084,19 +2967,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3104,20 +2986,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3132,20 +3012,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v8
+; GFX900-NEXT: v_mov_b32_e32 v1, v9
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v4, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3153,20 +3031,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v9
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3174,26 +3050,24 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=v"()
- %vec1 = call <4 x i64> asm "; def $0", "=v"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v9
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=v"()
+ %vec1 = call <4 x i64> asm "; def $0", "=v"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
ret void
}
@@ -3403,13 +3277,13 @@ define void @v_shuffle_v4i64_v4i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3419,13 +3293,13 @@ define void @v_shuffle_v4i64_v4i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3435,13 +3309,13 @@ define void @v_shuffle_v4i64_v4i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3633,18 +3507,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3652,18 +3524,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3671,19 +3541,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3698,18 +3565,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3717,18 +3582,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3736,19 +3599,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3763,20 +3623,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3784,21 +3640,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3806,22 +3657,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3836,19 +3681,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3856,19 +3701,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3876,19 +3721,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3903,19 +3748,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3923,19 +3768,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3943,19 +3788,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -3970,20 +3815,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3991,20 +3835,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4012,20 +3855,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4040,19 +3882,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4060,19 +3902,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4080,20 +3922,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4108,18 +3949,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4127,18 +3969,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4146,19 +3989,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4173,18 +4016,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4192,18 +4036,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4211,19 +4056,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4237,51 +4082,55 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4295,51 +4144,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4354,18 +4206,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, v8
+; GFX900-NEXT: v_mov_b32_e32 v13, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4373,18 +4225,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, v8
+; GFX90A-NEXT: v_mov_b32_e32 v13, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4392,19 +4244,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v8
+; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4418,19 +4269,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4438,18 +4290,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4457,18 +4309,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4483,18 +4335,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4502,18 +4354,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4521,19 +4373,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4548,19 +4400,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4568,19 +4419,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4588,20 +4438,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4616,18 +4465,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4635,18 +4485,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4654,19 +4505,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -4881,14 +4733,13 @@ define void @v_shuffle_v4i64_v4i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4898,14 +4749,13 @@ define void @v_shuffle_v4i64_v4i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4915,14 +4765,13 @@ define void @v_shuffle_v4i64_v4i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5114,18 +4963,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5133,18 +4980,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5152,19 +4997,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5179,18 +5021,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5198,18 +5038,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5217,19 +5055,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5244,20 +5079,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5265,20 +5096,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5286,21 +5113,16 @@ define void @v_shuffle_v4i64_v4i64__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5315,18 +5137,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5334,18 +5154,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5353,19 +5171,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5380,19 +5195,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5400,19 +5215,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5420,19 +5235,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5447,20 +5262,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5468,20 +5282,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5489,20 +5302,19 @@ define void @v_shuffle_v4i64_v4i64__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5517,19 +5329,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5537,19 +5349,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5557,20 +5369,19 @@ define void @v_shuffle_v4i64_v4i64__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5585,18 +5396,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v12
-; GFX900-NEXT: v_mov_b32_e32 v9, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5604,18 +5416,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v12
-; GFX90A-NEXT: v_mov_b32_e32 v9, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5623,19 +5436,19 @@ define void @v_shuffle_v4i64_v4i64__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5650,18 +5463,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5669,18 +5483,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5688,19 +5503,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5714,51 +5529,55 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5773,18 +5592,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v10, v12
; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5792,18 +5612,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v10, v12
; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5811,18 +5632,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v10, v12
; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5836,51 +5658,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5894,19 +5719,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5914,18 +5740,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5933,18 +5759,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -5959,18 +5785,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5978,18 +5804,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5997,19 +5823,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6024,19 +5850,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v4
-; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6044,19 +5869,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6064,20 +5888,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6092,18 +5915,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6111,18 +5935,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6130,19 +5955,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6156,42 +5982,43 @@ define void @v_shuffle_v4i64_v4i64__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6207,13 +6034,13 @@ define void @v_shuffle_v4i64_v4i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6223,13 +6050,13 @@ define void @v_shuffle_v4i64_v4i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v2, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6239,13 +6066,13 @@ define void @v_shuffle_v4i64_v4i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6261,11 +6088,13 @@ define void @v_shuffle_v4i64_v4i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6275,11 +6104,13 @@ define void @v_shuffle_v4i64_v4i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6289,11 +6120,13 @@ define void @v_shuffle_v4i64_v4i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6306,48 +6139,43 @@ define void @v_shuffle_v4i64_v4i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6360,42 +6188,43 @@ define void @v_shuffle_v4i64_v4i64__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6408,42 +6237,43 @@ define void @v_shuffle_v4i64_v4i64__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6457,17 +6287,17 @@ define void @v_shuffle_v4i64_v4i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v12, v6
; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -6476,17 +6306,17 @@ define void @v_shuffle_v4i64_v4i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v12, v6
; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -6495,17 +6325,18 @@ define void @v_shuffle_v4i64_v4i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v12, v6
; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6521,17 +6352,17 @@ define void @v_shuffle_v4i64_v4i64__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v14, v6
; GFX900-NEXT: v_mov_b32_e32 v15, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -6540,17 +6371,17 @@ define void @v_shuffle_v4i64_v4i64__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v14, v6
; GFX90A-NEXT: v_mov_b32_e32 v15, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -6559,17 +6390,18 @@ define void @v_shuffle_v4i64_v4i64__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v14, v6
; GFX942-NEXT: v_mov_b32_e32 v15, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6585,19 +6417,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6605,19 +6434,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6625,19 +6451,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6651,19 +6474,18 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6671,18 +6493,16 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6690,18 +6510,17 @@ define void @v_shuffle_v4i64_v4i64__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6718,60 +6537,49 @@ define void @v_shuffle_v4i64_v4i64__7_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6785,57 +6593,52 @@ define void @v_shuffle_v4i64_v4i64__7_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6849,66 +6652,52 @@ define void @v_shuffle_v4i64_v4i64__7_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6922,63 +6711,59 @@ define void @v_shuffle_v4i64_v4i64__7_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v20, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[10:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v18, v10
+; GFX90A-NEXT: v_mov_b32_e32 v19, v11
+; GFX90A-NEXT: global_store_dwordx4 v20, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v20, v[16:19], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v20, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[10:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v18, v10
+; GFX942-NEXT: v_mov_b32_e32 v19, v11
+; GFX942-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -6992,19 +6777,20 @@ define void @v_shuffle_v4i64_v4i64__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v8, v14
-; GFX900-NEXT: v_mov_b32_e32 v9, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7012,18 +6798,18 @@ define void @v_shuffle_v4i64_v4i64__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v8, v14
-; GFX90A-NEXT: v_mov_b32_e32 v9, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v10
+; GFX90A-NEXT: v_mov_b32_e32 v17, v11
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7031,18 +6817,18 @@ define void @v_shuffle_v4i64_v4i64__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v10
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v17, v11
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7056,19 +6842,20 @@ define void @v_shuffle_v4i64_v4i64__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v10, v14
-; GFX900-NEXT: v_mov_b32_e32 v11, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7076,18 +6863,18 @@ define void @v_shuffle_v4i64_v4i64__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v10, v14
-; GFX90A-NEXT: v_mov_b32_e32 v11, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v12
+; GFX90A-NEXT: v_mov_b32_e32 v17, v13
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7095,18 +6882,18 @@ define void @v_shuffle_v4i64_v4i64__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v12
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v17, v13
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7120,19 +6907,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7140,18 +6928,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7159,18 +6947,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7184,51 +6972,55 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7243,18 +7035,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v14
+; GFX900-NEXT: v_mov_b32_e32 v11, v15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7262,18 +7055,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v14
+; GFX90A-NEXT: v_mov_b32_e32 v11, v15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7281,18 +7075,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v14
+; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7307,18 +7102,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v12
+; GFX900-NEXT: v_mov_b32_e32 v11, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7326,18 +7122,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v12
+; GFX90A-NEXT: v_mov_b32_e32 v11, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7345,18 +7142,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v12
+; GFX942-NEXT: v_mov_b32_e32 v11, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7370,51 +7168,54 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7429,18 +7230,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7448,18 +7249,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7467,18 +7268,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7493,19 +7295,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7513,19 +7314,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7533,20 +7333,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7561,18 +7360,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v12
-; GFX900-NEXT: v_mov_b32_e32 v5, v13
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7580,18 +7380,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v12
-; GFX90A-NEXT: v_mov_b32_e32 v5, v13
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7599,18 +7400,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7755,39 +7558,33 @@ define void @v_shuffle_v4i64_v4i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7812,15 +7609,15 @@ define void @v_shuffle_v4i64_v4i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7828,15 +7625,15 @@ define void @v_shuffle_v4i64_v4i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7844,15 +7641,15 @@ define void @v_shuffle_v4i64_v4i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7924,15 +7721,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7942,15 +7737,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7960,15 +7753,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -7989,10 +7780,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8006,10 +7794,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8023,10 +7808,7 @@ define void @v_shuffle_v4i64_v4i64__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8043,18 +7825,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8064,19 +7842,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8086,20 +7859,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8114,18 +7881,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8133,18 +7898,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8152,19 +7915,17 @@ define void @v_shuffle_v4i64_v4i64__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8179,18 +7940,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8198,18 +7957,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8217,19 +7974,17 @@ define void @v_shuffle_v4i64_v4i64__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8244,18 +7999,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v8
-; GFX900-NEXT: v_mov_b32_e32 v11, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8263,18 +8016,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v8
-; GFX90A-NEXT: v_mov_b32_e32 v11, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8282,18 +8033,17 @@ define void @v_shuffle_v4i64_v4i64__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8307,54 +8057,49 @@ define void @v_shuffle_v4i64_v4i64__7_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v12, v0
-; GFX90A-NEXT: v_mov_b32_e32 v13, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8371,14 +8116,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8388,14 +8132,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8405,14 +8148,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8429,13 +8171,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8445,13 +8187,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8461,13 +8203,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8481,48 +8223,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8539,14 +8275,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8556,14 +8292,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8573,14 +8309,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8597,14 +8333,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8614,14 +8350,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8631,14 +8367,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8655,14 +8391,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
+; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8672,14 +8408,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
+; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8689,14 +8425,14 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8713,60 +8449,48 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8783,14 +8507,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8800,14 +8523,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8817,14 +8539,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -8838,57 +8559,49 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9150,15 +8863,15 @@ define void @v_shuffle_v4i64_v4i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v10
-; GFX900-NEXT: v_mov_b32_e32 v13, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9169,15 +8882,15 @@ define void @v_shuffle_v4i64_v4i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v10
-; GFX90A-NEXT: v_mov_b32_e32 v13, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9188,15 +8901,15 @@ define void @v_shuffle_v4i64_v4i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9366,13 +9079,13 @@ define void @v_shuffle_v4i64_v4i64__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9382,13 +9095,13 @@ define void @v_shuffle_v4i64_v4i64__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9398,13 +9111,13 @@ define void @v_shuffle_v4i64_v4i64__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9424,10 +9137,8 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9440,10 +9151,8 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9456,10 +9165,8 @@ define void @v_shuffle_v4i64_v4i64__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9476,18 +9183,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9497,18 +9200,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9518,19 +9217,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9545,18 +9239,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9564,18 +9256,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9583,19 +9273,17 @@ define void @v_shuffle_v4i64_v4i64__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9610,18 +9298,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v8
-; GFX900-NEXT: v_mov_b32_e32 v11, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9629,18 +9315,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v8
-; GFX90A-NEXT: v_mov_b32_e32 v11, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9648,19 +9332,17 @@ define void @v_shuffle_v4i64_v4i64__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9675,18 +9357,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v10
-; GFX900-NEXT: v_mov_b32_e32 v13, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9694,18 +9374,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v10
-; GFX90A-NEXT: v_mov_b32_e32 v13, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9713,20 +9391,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
%vec1 = call <4 x i64> asm "; def $0", "=v"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
@@ -9741,16 +9418,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9760,16 +9434,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9779,16 +9450,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9805,14 +9473,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9822,14 +9489,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9839,14 +9505,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9863,14 +9528,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9880,14 +9544,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9897,14 +9560,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9918,42 +9580,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -9968,18 +9630,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: v_mov_b32_e32 v11, v3
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9987,18 +9650,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v2
+; GFX90A-NEXT: v_mov_b32_e32 v11, v3
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10006,19 +9670,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v2
+; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10033,18 +9698,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: v_mov_b32_e32 v11, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10052,18 +9718,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v2
+; GFX90A-NEXT: v_mov_b32_e32 v11, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10071,19 +9738,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v2
+; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10098,18 +9766,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: v_mov_b32_e32 v11, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10117,18 +9786,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v2
+; GFX90A-NEXT: v_mov_b32_e32 v11, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10136,19 +9806,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v2
+; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10163,18 +9834,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10184,16 +9855,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10203,16 +9874,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10226,42 +9897,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10278,13 +9949,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10294,13 +9965,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10310,13 +9981,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10578,15 +10249,15 @@ define void @v_shuffle_v4i64_v4i64__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, v12
-; GFX900-NEXT: v_mov_b32_e32 v15, v13
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10597,15 +10268,15 @@ define void @v_shuffle_v4i64_v4i64__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, v12
-; GFX90A-NEXT: v_mov_b32_e32 v15, v13
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10616,15 +10287,15 @@ define void @v_shuffle_v4i64_v4i64__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, v12
-; GFX942-NEXT: v_mov_b32_e32 v15, v13
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10794,14 +10465,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10811,14 +10481,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10828,14 +10497,13 @@ define void @v_shuffle_v4i64_v4i64__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10855,10 +10523,8 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10871,10 +10537,8 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10887,10 +10551,8 @@ define void @v_shuffle_v4i64_v4i64__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10907,18 +10569,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10928,18 +10586,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10949,19 +10603,14 @@ define void @v_shuffle_v4i64_v4i64__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -10976,18 +10625,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10995,18 +10642,16 @@ define void @v_shuffle_v4i64_v4i64__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11014,19 +10659,17 @@ define void @v_shuffle_v4i64_v4i64__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11041,18 +10684,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11060,18 +10701,16 @@ define void @v_shuffle_v4i64_v4i64__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11079,19 +10718,17 @@ define void @v_shuffle_v4i64_v4i64__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11106,18 +10743,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11125,18 +10760,16 @@ define void @v_shuffle_v4i64_v4i64__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11144,18 +10777,17 @@ define void @v_shuffle_v4i64_v4i64__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11172,16 +10804,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11191,16 +10820,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11210,16 +10836,13 @@ define void @v_shuffle_v4i64_v4i64__7_4_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11233,54 +10856,56 @@ define void @v_shuffle_v4i64_v4i64__7_5_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v8, v4
; GFX900-NEXT: v_mov_b32_e32 v9, v5
; GFX900-NEXT: v_mov_b32_e32 v10, v4
; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_5_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
; GFX90A-NEXT: v_mov_b32_e32 v12, v4
; GFX90A-NEXT: v_mov_b32_e32 v13, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, v4
+; GFX90A-NEXT: v_mov_b32_e32 v15, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v12, v4
; GFX942-NEXT: v_mov_b32_e32 v13, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v14, v4
+; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11297,14 +10922,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11314,14 +10938,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11331,14 +10954,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11352,45 +10974,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11405,18 +11024,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11424,18 +11044,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11443,19 +11064,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11470,18 +11092,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11489,18 +11112,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11508,19 +11132,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11535,18 +11160,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11554,18 +11180,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11573,19 +11200,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11600,19 +11228,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11622,17 +11249,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11642,18 +11268,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11670,13 +11294,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11686,13 +11310,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11702,13 +11326,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11722,45 +11346,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_5_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_5_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11774,42 +11395,43 @@ define void @v_shuffle_v4i64_v4i64__u_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__u_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__u_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__u_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11824,18 +11446,18 @@ define void @v_shuffle_v4i64_v4i64__0_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[10:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v18, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, v6
+; GFX900-NEXT: v_mov_b32_e32 v13, v7
+; GFX900-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v18, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11843,18 +11465,18 @@ define void @v_shuffle_v4i64_v4i64__0_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[10:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v6
+; GFX90A-NEXT: v_mov_b32_e32 v13, v7
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11862,19 +11484,19 @@ define void @v_shuffle_v4i64_v4i64__0_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[10:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, v6
+; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11889,18 +11511,18 @@ define void @v_shuffle_v4i64_v4i64__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, v6
+; GFX900-NEXT: v_mov_b32_e32 v13, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11908,18 +11530,18 @@ define void @v_shuffle_v4i64_v4i64__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v6
+; GFX90A-NEXT: v_mov_b32_e32 v13, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11927,19 +11549,19 @@ define void @v_shuffle_v4i64_v4i64__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, v6
+; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -11954,18 +11576,18 @@ define void @v_shuffle_v4i64_v4i64__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, v6
+; GFX900-NEXT: v_mov_b32_e32 v13, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11973,18 +11595,18 @@ define void @v_shuffle_v4i64_v4i64__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v6
+; GFX90A-NEXT: v_mov_b32_e32 v13, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11992,19 +11614,19 @@ define void @v_shuffle_v4i64_v4i64__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, v6
+; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12018,20 +11640,18 @@ define void @v_shuffle_v4i64_v4i64__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__3_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12041,17 +11661,16 @@ define void @v_shuffle_v4i64_v4i64__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12061,17 +11680,16 @@ define void @v_shuffle_v4i64_v4i64__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12088,13 +11706,13 @@ define void @v_shuffle_v4i64_v4i64__4_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12104,13 +11722,13 @@ define void @v_shuffle_v4i64_v4i64__4_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v2, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12120,13 +11738,13 @@ define void @v_shuffle_v4i64_v4i64__4_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12143,11 +11761,13 @@ define void @v_shuffle_v4i64_v4i64__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12157,11 +11777,13 @@ define void @v_shuffle_v4i64_v4i64__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12171,11 +11793,13 @@ define void @v_shuffle_v4i64_v4i64__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12189,48 +11813,43 @@ define void @v_shuffle_v4i64_v4i64__6_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__6_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__6_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__6_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12244,48 +11863,43 @@ define void @v_shuffle_v4i64_v4i64__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_u_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12299,64 +11913,61 @@ define void @v_shuffle_v4i64_v4i64__7_0_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_0_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v18, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[10:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_0_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[10:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_0_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[10:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12371,18 +11982,19 @@ define void @v_shuffle_v4i64_v4i64__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12390,18 +12002,19 @@ define void @v_shuffle_v4i64_v4i64__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v8, v10
; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12409,19 +12022,20 @@ define void @v_shuffle_v4i64_v4i64__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v8, v10
; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12436,18 +12050,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12455,18 +12070,19 @@ define void @v_shuffle_v4i64_v4i64__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12474,19 +12090,20 @@ define void @v_shuffle_v4i64_v4i64__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12501,18 +12118,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12520,18 +12138,19 @@ define void @v_shuffle_v4i64_v4i64__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12539,18 +12158,20 @@ define void @v_shuffle_v4i64_v4i64__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12564,54 +12185,52 @@ define void @v_shuffle_v4i64_v4i64__7_4_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_4_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_4_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12625,48 +12244,52 @@ define void @v_shuffle_v4i64_v4i64__7_5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_5_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_5_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12680,48 +12303,52 @@ define void @v_shuffle_v4i64_v4i64__7_6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_6_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12735,45 +12362,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_u_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_u_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12788,18 +12412,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12807,18 +12432,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12826,19 +12452,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12853,18 +12480,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12872,18 +12500,19 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12891,19 +12520,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12918,37 +12548,39 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_2_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12956,19 +12588,20 @@ define void @v_shuffle_v4i64_v4i64__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -12983,19 +12616,18 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13005,17 +12637,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13025,18 +12656,16 @@ define void @v_shuffle_v4i64_v4i64__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -13053,13 +12682,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13069,13 +12698,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13085,13 +12714,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -13108,11 +12737,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13122,11 +12753,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13136,11 +12769,13 @@ define void @v_shuffle_v4i64_v4i64__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -13154,45 +12789,42 @@ define void @v_shuffle_v4i64_v4i64__7_7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4i64_v4i64__7_7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4i64_v4i64__7_7_6_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_7_6_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=v"()
@@ -13328,10 +12960,9 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13474,10 +13105,9 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13750,15 +13380,14 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13895,242 +13524,435 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() {
}
define void @s_shuffle_v4i64_v4i64__7_7_u_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u:
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_u:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_u:
+define void @s_shuffle_v4i64_v4i64__7_7_2_u() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_u:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_u:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u:
+define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_u:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u:
+define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_6_u() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_7_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_u:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u:
+define void @s_shuffle_v4i64_v4i64__7_7_7_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_7_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_7_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -14141,14 +13963,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -14159,3681 +13981,180 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_u:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_6_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> zeroinitializer
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_1_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_2_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_3_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_4_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_5_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_6_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_1_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_2_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_3_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_4_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_5_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_6_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_3_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_4_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_5_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_2_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_3_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__7_7_7_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__7_7_7_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+define void @s_shuffle_v4i64_v4i64__7_7_7_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s18
; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s18
; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: s_mov_b32 s12, s6
; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__7_7_7_7() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
+define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
@@ -17844,41 +14165,43 @@ define void @s_shuffle_v4i64_v4i64__u_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
+define void @s_shuffle_v4i64_v4i64__0_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
+define void @s_shuffle_v4i64_v4i64__1_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: s_mov_b32 s14, s12
@@ -17888,43 +14211,43 @@ define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
+define void @s_shuffle_v4i64_v4i64__2_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
+define void @s_shuffle_v4i64_v4i64__3_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: s_mov_b32 s14, s12
@@ -17934,17 +14257,17 @@ define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
+define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
@@ -17955,17 +14278,17 @@ define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+define void @s_shuffle_v4i64_v4i64__5_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
@@ -17981,11 +14304,11 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
@@ -18001,11 +14324,11 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -18022,21 +14345,48 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+define void @s_shuffle_v4i64_v4i64__6_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -18046,15 +14396,17 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -18064,17 +14416,17 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s12
@@ -18085,25 +14437,23 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18111,19 +14461,17 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18131,19 +14479,17 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -18152,23 +14498,186 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_1_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_2_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_3_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+define void @s_shuffle_v4i64_v4i64__7_4_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18176,17 +14685,19 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18194,17 +14705,19 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -18213,57 +14726,53 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+define void @s_shuffle_v4i64_v4i64__7_5_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18274,35 +14783,35 @@ define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+define void @s_shuffle_v4i64_v4i64__7_6_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18310,17 +14819,19 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18328,17 +14839,19 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -18347,25 +14860,48 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_0_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18373,19 +14909,17 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18393,113 +14927,165 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_1_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_2_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[16:23]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s20
+; GFX9-NEXT: s_mov_b32 s13, s21
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_3_0() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[16:23]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s22
+; GFX9-NEXT: s_mov_b32 s13, s23
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_4_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -18507,19 +15093,19 @@ define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -18527,137 +15113,141 @@ define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: s_mov_b32 s12, s4
; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_5_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_6_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18666,124 +15256,197 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__u_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__0_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__1_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__2_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__4_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18792,165 +15455,117 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -18961,16 +15576,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -18981,16 +15594,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18999,25 +15610,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+define void @s_shuffle_v4i64_v4i64__7_0_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19028,16 +15637,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19048,16 +15657,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19066,238 +15675,131 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+define void @s_shuffle_v4i64_v4i64__7_2_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_3_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_4_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19305,19 +15807,19 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19325,19 +15827,19 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -19346,49 +15848,53 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_5_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19397,37 +15903,37 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19435,19 +15941,19 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19455,19 +15961,19 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -19476,49 +15982,224 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_2_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_3_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s18
+; GFX9-NEXT: s_mov_b32 s13, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19527,67 +16208,197 @@ define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+define void @s_shuffle_v4i64_v4i64__u_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__0_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__2_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19596,88 +16407,86 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19690,14 +16499,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19710,14 +16519,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19730,21 +16539,21 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19755,16 +16564,14 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19775,16 +16582,14 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19795,63 +16600,65 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19862,23 +16669,25 @@ define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19889,16 +16698,14 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19909,16 +16716,14 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19929,23 +16734,21 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19956,14 +16759,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19974,14 +16779,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19992,23 +16799,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20019,12 +16826,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20035,12 +16846,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20051,126 +16866,63 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20181,57 +16933,63 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20242,21 +17000,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20267,14 +17027,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20285,41 +17045,39 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20330,14 +17088,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20348,41 +17106,172 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s18
+; GFX9-NEXT: s_mov_b32 s13, s19
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20395,14 +17284,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20415,14 +17304,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20435,580 +17324,648 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s14
; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4:
+define void @s_shuffle_v4i64_v4i64__u_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__0_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__1_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+define void @s_shuffle_v4i64_v4i64__2_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21019,779 +17976,738 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s18
+; GFX9-NEXT: s_mov_b32 s15, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s14, s18
+; GFX9-NEXT: s_mov_b32 s15, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+define void @s_shuffle_v4i64_v4i64__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21799,533 +18715,772 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+define void @s_shuffle_v4i64_v4i64__6_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_3_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_5_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_6_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_0_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_1_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s20
+; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s20
+; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__u_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22334,8 +19489,8 @@ define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22343,7 +19498,7 @@ define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22352,8 +19507,8 @@ define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22361,241 +19516,385 @@ define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+define void @s_shuffle_v4i64_v4i64__2_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__6_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22603,16 +19902,17 @@ define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22620,138 +19920,210 @@ define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_4_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
@@ -22760,65 +20132,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
}
define void @s_shuffle_v4i64_v4i64__7_7_0_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
@@ -22831,17 +20161,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -22851,17 +20181,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -22936,15 +20266,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23001,17 +20330,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23024,118 +20353,46 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_5() {
}
define void @s_shuffle_v4i64_v4i64__7_7_4_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_7_6_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_6_5() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s18
+; GFX9-NEXT: s_mov_b32 s11, s19
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
@@ -23304,17 +20561,17 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -23324,17 +20581,17 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -23344,17 +20601,18 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23484,14 +20742,14 @@ define void @s_shuffle_v4i64_v4i64__6_6_6_6() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -23755,17 +21013,17 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -23775,17 +21033,17 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -23795,17 +21053,18 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23990,19 +21249,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s14, s20
+; GFX900-NEXT: s_mov_b32 s15, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24012,19 +21269,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s14, s20
+; GFX90A-NEXT: s_mov_b32 s15, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24039,14 +21294,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24272,58 +21525,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_6() {
}
define void @s_shuffle_v4i64_v4i64__7_7_4_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_6:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_6:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_6:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s18
+; GFX9-NEXT: s_mov_b32 s11, s19
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 6>
@@ -24400,12 +21617,12 @@ define void @s_shuffle_v4i64_v4i64__u_7_7_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -24489,17 +21706,17 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24509,17 +21726,17 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24529,17 +21746,18 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24556,17 +21774,17 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24576,17 +21794,17 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24596,17 +21814,18 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24623,17 +21842,17 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24643,17 +21862,17 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24665,15 +21884,16 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24712,14 +21932,14 @@ define void @s_shuffle_v4i64_v4i64__5_7_7_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -24736,14 +21956,12 @@ define void @s_shuffle_v4i64_v4i64__6_7_7_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -24983,17 +22201,17 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25003,17 +22221,17 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25023,17 +22241,18 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25160,12 +22379,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -25178,65 +22397,23 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_0_7() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_0_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 7>
@@ -25249,17 +22426,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25269,17 +22446,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25289,17 +22466,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25319,14 +22496,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() {
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s14, s22
-; GFX900-NEXT: s_mov_b32 s15, s23
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25339,14 +22514,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() {
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s14, s22
-; GFX90A-NEXT: s_mov_b32 s15, s23
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25358,15 +22531,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25383,17 +22555,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25403,17 +22575,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25425,15 +22597,15 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25446,58 +22618,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_4_7() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_4_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 7>
@@ -25506,62 +22642,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_5_7() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_5_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
index ee3b303f88471..8ffb3615940bd 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
@@ -58,39 +58,33 @@ define void @v_shuffle_v4p0_v2p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -114,39 +108,33 @@ define void @v_shuffle_v4p0_v2p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -160,55 +148,42 @@ define void @v_shuffle_v4p0_v2p0__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -222,49 +197,43 @@ define void @v_shuffle_v4p0_v2p0__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -291,31 +260,27 @@ define void @v_shuffle_v4p0_v2p0__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -329,39 +294,40 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -375,54 +341,54 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -436,57 +402,51 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -500,45 +460,42 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -552,44 +509,47 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
%vec1 = call <2 x ptr> asm "; def $0", "=v"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
@@ -601,63 +561,54 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -671,57 +622,55 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -735,54 +684,52 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -796,42 +743,43 @@ define void @v_shuffle_v4p0_v2p0__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -846,13 +794,13 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -860,13 +808,13 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -874,13 +822,13 @@ define void @v_shuffle_v4p0_v2p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -943,53 +891,47 @@ define void @v_shuffle_v4p0_v2p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__1_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v6, v2
; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v6, v2
; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1003,13 +945,13 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1017,13 +959,13 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1031,13 +973,13 @@ define void @v_shuffle_v4p0_v2p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1051,21 +993,16 @@ define void @v_shuffle_v4p0_v2p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1073,21 +1010,16 @@ define void @v_shuffle_v4p0_v2p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1095,22 +1027,16 @@ define void @v_shuffle_v4p0_v2p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1125,19 +1051,16 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1145,19 +1068,16 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1165,20 +1085,16 @@ define void @v_shuffle_v4p0_v2p0__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1192,63 +1108,58 @@ define void @v_shuffle_v4p0_v2p0__3_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_1_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v12, v0
-; GFX90A-NEXT: v_mov_b32_e32 v13, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1263,21 +1174,19 @@ define void @v_shuffle_v4p0_v2p0__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1285,21 +1194,19 @@ define void @v_shuffle_v4p0_v2p0__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1307,21 +1214,19 @@ define void @v_shuffle_v4p0_v2p0__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1336,19 +1241,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1356,19 +1261,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1376,20 +1281,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1404,19 +1308,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1424,19 +1325,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1444,20 +1342,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1474,17 +1368,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1494,17 +1387,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1514,18 +1406,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1540,20 +1430,18 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1561,20 +1449,18 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1582,20 +1468,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1609,42 +1494,43 @@ define void @v_shuffle_v4p0_v2p0__u_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__u_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1657,49 +1543,43 @@ define void @v_shuffle_v4p0_v2p0__0_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__0_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__0_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__0_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1712,42 +1592,43 @@ define void @v_shuffle_v4p0_v2p0__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__1_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1760,42 +1641,43 @@ define void @v_shuffle_v4p0_v2p0__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1809,19 +1691,16 @@ define void @v_shuffle_v4p0_v2p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1829,19 +1708,16 @@ define void @v_shuffle_v4p0_v2p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1849,19 +1725,16 @@ define void @v_shuffle_v4p0_v2p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1876,19 +1749,16 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1896,19 +1766,16 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1916,19 +1783,17 @@ define void @v_shuffle_v4p0_v2p0__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -1942,69 +1807,52 @@ define void @v_shuffle_v4p0_v2p0__3_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: v_mov_b32_e32 v10, v2
-; GFX900-NEXT: v_mov_b32_e32 v11, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: v_mov_b32_e32 v12, v2
-; GFX90A-NEXT: v_mov_b32_e32 v13, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2018,66 +1866,58 @@ define void @v_shuffle_v4p0_v2p0__3_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2092,18 +1932,18 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2111,18 +1951,18 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2130,18 +1970,18 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2155,51 +1995,55 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2213,51 +2057,54 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2272,18 +2119,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2291,18 +2139,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2310,18 +2159,20 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2385,39 +2236,33 @@ define void @v_shuffle_v4p0_v2p0__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2443,53 +2288,47 @@ define void @v_shuffle_v4p0_v2p0__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: v_mov_b32_e32 v6, v2
; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: v_mov_b32_e32 v6, v2
; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2503,54 +2342,43 @@ define void @v_shuffle_v4p0_v2p0__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2564,70 +2392,52 @@ define void @v_shuffle_v4p0_v2p0__3_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2642,20 +2452,16 @@ define void @v_shuffle_v4p0_v2p0__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v4
-; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2663,20 +2469,16 @@ define void @v_shuffle_v4p0_v2p0__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: v_mov_b32_e32 v12, v4
-; GFX90A-NEXT: v_mov_b32_e32 v13, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2684,21 +2486,17 @@ define void @v_shuffle_v4p0_v2p0__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2712,54 +2510,49 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2773,48 +2566,42 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v0, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2831,15 +2618,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2849,15 +2635,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2867,15 +2652,14 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2889,63 +2673,51 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -2959,42 +2731,43 @@ define void @v_shuffle_v4p0_v2p0__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3009,16 +2782,18 @@ define void @v_shuffle_v4p0_v2p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3026,16 +2801,18 @@ define void @v_shuffle_v4p0_v2p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3043,17 +2820,19 @@ define void @v_shuffle_v4p0_v2p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3070,17 +2849,16 @@ define void @v_shuffle_v4p0_v2p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3090,17 +2868,16 @@ define void @v_shuffle_v4p0_v2p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3110,17 +2887,16 @@ define void @v_shuffle_v4p0_v2p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3134,49 +2910,43 @@ define void @v_shuffle_v4p0_v2p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3190,42 +2960,43 @@ define void @v_shuffle_v4p0_v2p0__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3239,61 +3010,61 @@ define void @v_shuffle_v4p0_v2p0__3_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3308,18 +3079,19 @@ define void @v_shuffle_v4p0_v2p0__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3327,18 +3099,19 @@ define void @v_shuffle_v4p0_v2p0__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3346,18 +3119,20 @@ define void @v_shuffle_v4p0_v2p0__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3371,58 +3146,52 @@ define void @v_shuffle_v4p0_v2p0__3_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3436,45 +3205,42 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3489,16 +3255,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3506,16 +3275,19 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3523,17 +3295,20 @@ define void @v_shuffle_v4p0_v2p0__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3550,17 +3325,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3570,17 +3344,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3590,18 +3363,16 @@ define void @v_shuffle_v4p0_v2p0__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
@@ -3615,45 +3386,42 @@ define void @v_shuffle_v4p0_v2p0__3_3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v2p0__3_3_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v2p0__3_3_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
index 09e497259766e..8ab9f381704e7 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
@@ -100,39 +100,33 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -196,39 +190,33 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -242,55 +230,42 @@ define void @v_shuffle_v4p0_v3p0__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -304,49 +279,43 @@ define void @v_shuffle_v4p0_v3p0__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -360,49 +329,43 @@ define void @v_shuffle_v4p0_v3p0__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -416,45 +379,40 @@ define void @v_shuffle_v4p0_v3p0__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -468,39 +426,40 @@ define void @v_shuffle_v4p0_v3p0__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -514,39 +473,40 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -560,51 +520,54 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -618,51 +581,54 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -676,57 +642,51 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -740,45 +700,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -792,45 +749,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -844,51 +798,45 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -902,67 +850,54 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -976,58 +911,55 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1041,57 +973,55 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1105,54 +1035,52 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1166,51 +1094,52 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1224,42 +1153,43 @@ define void @v_shuffle_v4p0_v3p0__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1274,13 +1204,13 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1288,13 +1218,13 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1302,13 +1232,13 @@ define void @v_shuffle_v4p0_v3p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1370,15 +1300,15 @@ define void @v_shuffle_v4p0_v3p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1386,17 +1316,15 @@ define void @v_shuffle_v4p0_v3p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1404,17 +1332,15 @@ define void @v_shuffle_v4p0_v3p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1430,16 +1356,13 @@ define void @v_shuffle_v4p0_v3p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1449,16 +1372,13 @@ define void @v_shuffle_v4p0_v3p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1468,16 +1388,13 @@ define void @v_shuffle_v4p0_v3p0__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1491,13 +1408,13 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1505,13 +1422,13 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1519,13 +1436,13 @@ define void @v_shuffle_v4p0_v3p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1604,20 +1521,16 @@ define void @v_shuffle_v4p0_v3p0__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1625,20 +1538,16 @@ define void @v_shuffle_v4p0_v3p0__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1646,21 +1555,16 @@ define void @v_shuffle_v4p0_v3p0__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1675,19 +1579,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1695,19 +1596,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1715,20 +1613,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1742,64 +1636,58 @@ define void @v_shuffle_v4p0_v3p0__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_1_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v12, v4
+; GFX900-NEXT: v_mov_b32_e32 v13, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v12, v4
+; GFX90A-NEXT: v_mov_b32_e32 v13, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v12, v4
+; GFX942-NEXT: v_mov_b32_e32 v13, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1814,19 +1702,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1834,19 +1722,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1854,19 +1742,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1881,21 +1769,19 @@ define void @v_shuffle_v4p0_v3p0__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1903,21 +1789,19 @@ define void @v_shuffle_v4p0_v3p0__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1925,21 +1809,19 @@ define void @v_shuffle_v4p0_v3p0__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -1954,19 +1836,19 @@ define void @v_shuffle_v4p0_v3p0__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1974,19 +1856,19 @@ define void @v_shuffle_v4p0_v3p0__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1994,20 +1876,19 @@ define void @v_shuffle_v4p0_v3p0__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2022,18 +1903,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2041,18 +1923,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2060,19 +1943,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2087,18 +1970,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2106,18 +1987,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2125,19 +2004,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2152,18 +2028,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2171,18 +2047,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2190,19 +2066,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2217,20 +2092,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2238,20 +2111,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2259,20 +2130,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2287,19 +2156,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2307,19 +2175,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2327,20 +2194,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2355,21 +2220,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2377,21 +2239,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2399,22 +2258,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2575,14 +2431,13 @@ define void @v_shuffle_v4p0_v3p0__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2592,14 +2447,13 @@ define void @v_shuffle_v4p0_v3p0__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2609,14 +2463,13 @@ define void @v_shuffle_v4p0_v3p0__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2743,18 +2596,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2762,18 +2613,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2781,19 +2630,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2808,18 +2654,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2827,18 +2671,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2846,19 +2688,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2873,21 +2712,16 @@ define void @v_shuffle_v4p0_v3p0__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2895,21 +2729,16 @@ define void @v_shuffle_v4p0_v3p0__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2917,22 +2746,16 @@ define void @v_shuffle_v4p0_v3p0__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -2947,19 +2770,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2967,19 +2790,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2987,19 +2810,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3014,20 +2837,19 @@ define void @v_shuffle_v4p0_v3p0__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3035,20 +2857,19 @@ define void @v_shuffle_v4p0_v3p0__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3056,20 +2877,19 @@ define void @v_shuffle_v4p0_v3p0__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3084,18 +2904,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3104,18 +2924,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3124,19 +2944,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3152,18 +2971,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3171,18 +2991,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3190,19 +3011,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3216,51 +3037,55 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3274,51 +3099,54 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3333,18 +3161,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3352,18 +3180,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3371,18 +3199,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3397,39 +3225,37 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_3_1:
+; GFX900-NEXT: v_mov_b32_e32 v2, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3437,20 +3263,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3465,18 +3290,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3484,18 +3310,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3503,19 +3330,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3529,42 +3357,43 @@ define void @v_shuffle_v4p0_v3p0__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__u_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3580,11 +3409,13 @@ define void @v_shuffle_v4p0_v3p0__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3594,11 +3425,13 @@ define void @v_shuffle_v4p0_v3p0__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3608,11 +3441,13 @@ define void @v_shuffle_v4p0_v3p0__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3625,49 +3460,43 @@ define void @v_shuffle_v4p0_v3p0__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3680,42 +3509,43 @@ define void @v_shuffle_v4p0_v3p0__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3728,42 +3558,43 @@ define void @v_shuffle_v4p0_v3p0__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3777,17 +3608,17 @@ define void @v_shuffle_v4p0_v3p0__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: v_mov_b32_e32 v10, v4
; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -3796,17 +3627,17 @@ define void @v_shuffle_v4p0_v3p0__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: v_mov_b32_e32 v10, v4
; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -3815,17 +3646,18 @@ define void @v_shuffle_v4p0_v3p0__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_mov_b32_e32 v10, v4
; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3841,19 +3673,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3861,19 +3690,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3881,19 +3707,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3908,18 +3731,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3927,18 +3748,16 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3946,18 +3765,17 @@ define void @v_shuffle_v4p0_v3p0__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -3971,66 +3789,52 @@ define void @v_shuffle_v4p0_v3p0__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4044,63 +3848,52 @@ define void @v_shuffle_v4p0_v3p0__5_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4114,63 +3907,58 @@ define void @v_shuffle_v4p0_v3p0__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v14, v8
+; GFX900-NEXT: v_mov_b32_e32 v15, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v14, v8
+; GFX90A-NEXT: v_mov_b32_e32 v15, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v14, v8
+; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4185,18 +3973,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v12, v8
+; GFX900-NEXT: v_mov_b32_e32 v13, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4204,18 +3992,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v12, v8
+; GFX90A-NEXT: v_mov_b32_e32 v13, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4223,18 +4011,18 @@ define void @v_shuffle_v4p0_v3p0__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v8
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4249,18 +4037,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4268,18 +4056,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4287,18 +4075,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4312,51 +4100,55 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4371,18 +4163,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4390,18 +4183,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: v_mov_b32_e32 v8, v10
; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4409,18 +4203,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v8, v10
; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4434,51 +4229,54 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4493,19 +4291,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v6
+; GFX900-NEXT: v_mov_b32_e32 v3, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4513,19 +4310,18 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4533,20 +4329,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4561,18 +4356,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4580,18 +4376,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4599,18 +4396,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4716,39 +4515,33 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4773,15 +4566,15 @@ define void @v_shuffle_v4p0_v3p0__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4789,17 +4582,15 @@ define void @v_shuffle_v4p0_v3p0__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4807,17 +4598,15 @@ define void @v_shuffle_v4p0_v3p0__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4834,16 +4623,13 @@ define void @v_shuffle_v4p0_v3p0__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4853,16 +4639,13 @@ define void @v_shuffle_v4p0_v3p0__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4872,16 +4655,13 @@ define void @v_shuffle_v4p0_v3p0__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4902,10 +4682,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4919,10 +4696,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4936,10 +4710,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -4956,19 +4727,14 @@ define void @v_shuffle_v4p0_v3p0__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4978,19 +4744,14 @@ define void @v_shuffle_v4p0_v3p0__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5000,20 +4761,14 @@ define void @v_shuffle_v4p0_v3p0__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5028,18 +4783,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5047,18 +4800,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5066,19 +4817,17 @@ define void @v_shuffle_v4p0_v3p0__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5093,18 +4842,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5112,18 +4859,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5131,18 +4876,17 @@ define void @v_shuffle_v4p0_v3p0__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5156,54 +4900,49 @@ define void @v_shuffle_v4p0_v3p0__5_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5220,14 +4959,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5237,14 +4975,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5254,14 +4991,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5275,51 +5011,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5336,14 +5063,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5353,14 +5080,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5370,14 +5097,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5394,14 +5121,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5411,14 +5138,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5428,14 +5155,14 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5449,63 +5176,51 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5519,54 +5234,49 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v4
+; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v4
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v4
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5763,15 +5473,15 @@ define void @v_shuffle_v4p0_v3p0__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v8
-; GFX900-NEXT: v_mov_b32_e32 v11, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5782,15 +5492,15 @@ define void @v_shuffle_v4p0_v3p0__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v8
-; GFX90A-NEXT: v_mov_b32_e32 v11, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5801,15 +5511,15 @@ define void @v_shuffle_v4p0_v3p0__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5924,14 +5634,13 @@ define void @v_shuffle_v4p0_v3p0__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5941,14 +5650,13 @@ define void @v_shuffle_v4p0_v3p0__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5958,14 +5666,13 @@ define void @v_shuffle_v4p0_v3p0__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -5986,10 +5693,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6003,10 +5707,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6020,10 +5721,7 @@ define void @v_shuffle_v4p0_v3p0__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6040,19 +5738,14 @@ define void @v_shuffle_v4p0_v3p0__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6062,19 +5755,14 @@ define void @v_shuffle_v4p0_v3p0__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6084,20 +5772,14 @@ define void @v_shuffle_v4p0_v3p0__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6112,18 +5794,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6131,18 +5811,16 @@ define void @v_shuffle_v4p0_v3p0__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6150,19 +5828,17 @@ define void @v_shuffle_v4p0_v3p0__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6177,18 +5853,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[2:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6196,18 +5870,16 @@ define void @v_shuffle_v4p0_v3p0__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[2:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6215,18 +5887,17 @@ define void @v_shuffle_v4p0_v3p0__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6240,60 +5911,56 @@ define void @v_shuffle_v4p0_v3p0__5_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v6, v2
; GFX900-NEXT: v_mov_b32_e32 v7, v3
; GFX900-NEXT: v_mov_b32_e32 v8, v2
; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
; GFX90A-NEXT: v_mov_b32_e32 v10, v2
; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v2
+; GFX90A-NEXT: v_mov_b32_e32 v13, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v10, v2
; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v2
+; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6310,14 +5977,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6327,14 +5993,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6344,14 +6009,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6365,45 +6029,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6418,18 +6079,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6437,18 +6099,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6456,19 +6119,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6483,18 +6147,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6502,18 +6167,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6521,19 +6187,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6550,17 +6217,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6570,17 +6236,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6590,18 +6255,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6615,45 +6278,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6667,42 +6327,43 @@ define void @v_shuffle_v4p0_v3p0__u_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__u_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__u_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__u_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6717,18 +6378,18 @@ define void @v_shuffle_v4p0_v3p0__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[8:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6736,18 +6397,18 @@ define void @v_shuffle_v4p0_v3p0__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[8:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6755,19 +6416,19 @@ define void @v_shuffle_v4p0_v3p0__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6782,18 +6443,18 @@ define void @v_shuffle_v4p0_v3p0__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6801,18 +6462,18 @@ define void @v_shuffle_v4p0_v3p0__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6820,19 +6481,19 @@ define void @v_shuffle_v4p0_v3p0__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6849,17 +6510,16 @@ define void @v_shuffle_v4p0_v3p0__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6869,17 +6529,16 @@ define void @v_shuffle_v4p0_v3p0__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6889,17 +6548,16 @@ define void @v_shuffle_v4p0_v3p0__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6916,11 +6574,13 @@ define void @v_shuffle_v4p0_v3p0__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6930,11 +6590,13 @@ define void @v_shuffle_v4p0_v3p0__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6944,11 +6606,13 @@ define void @v_shuffle_v4p0_v3p0__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -6962,49 +6626,43 @@ define void @v_shuffle_v4p0_v3p0__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__4_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__4_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__4_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7018,48 +6676,43 @@ define void @v_shuffle_v4p0_v3p0__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7073,64 +6726,61 @@ define void @v_shuffle_v4p0_v3p0__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7145,18 +6795,19 @@ define void @v_shuffle_v4p0_v3p0__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:9]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v6, v8
; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7164,18 +6815,19 @@ define void @v_shuffle_v4p0_v3p0__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:9]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v6, v8
; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7183,19 +6835,20 @@ define void @v_shuffle_v4p0_v3p0__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v6, v8
; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7210,18 +6863,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:11]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: v_mov_b32_e32 v7, v9
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7229,18 +6883,19 @@ define void @v_shuffle_v4p0_v3p0__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:11]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v8
+; GFX90A-NEXT: v_mov_b32_e32 v7, v9
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7248,18 +6903,20 @@ define void @v_shuffle_v4p0_v3p0__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:11]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v8
+; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7273,51 +6930,52 @@ define void @v_shuffle_v4p0_v3p0__5_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: v_mov_b32_e32 v7, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v0
+; GFX90A-NEXT: v_mov_b32_e32 v7, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v0
+; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7331,54 +6989,52 @@ define void @v_shuffle_v4p0_v3p0__5_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7392,45 +7048,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7445,18 +7098,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:7]
+; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7464,18 +7118,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:7]
+; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7483,19 +7138,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:7]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7510,18 +7166,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:5]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:5]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7529,18 +7186,19 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:5]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:5]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7548,19 +7206,20 @@ define void @v_shuffle_v4p0_v3p0__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:5]
+; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:9]
+; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7577,17 +7236,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v10
+; GFX900-NEXT: v_mov_b32_e32 v7, v11
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7597,17 +7255,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, v10
+; GFX90A-NEXT: v_mov_b32_e32 v7, v11
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7617,18 +7274,16 @@ define void @v_shuffle_v4p0_v3p0__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, v10
+; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7645,11 +7300,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7659,11 +7316,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7673,11 +7332,13 @@ define void @v_shuffle_v4p0_v3p0__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7691,45 +7352,42 @@ define void @v_shuffle_v4p0_v3p0__5_5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v3p0__5_5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
+; GFX900-NEXT: v_mov_b32_e32 v7, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v3p0__5_5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, v4
+; GFX90A-NEXT: v_mov_b32_e32 v7, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v6, v4
+; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=v"()
@@ -7865,10 +7523,9 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7966,10 +7623,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7986,15 +7642,13 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8004,15 +7658,13 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8075,11 +7727,11 @@ define void @s_shuffle_v4p0_v3p0__5_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8130,13 +7782,11 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8149,46 +7799,18 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
}
define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_3_u_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
@@ -8201,10 +7823,10 @@ define void @s_shuffle_v4p0_v3p0__5_4_u_u() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -8217,50 +7839,18 @@ define void @s_shuffle_v4p0_v3p0__5_4_u_u() {
}
define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
@@ -8269,65 +7859,21 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
}
define void @s_shuffle_v4p0_v3p0__5_5_0_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
@@ -8340,17 +7886,15 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8360,17 +7904,15 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8380,16 +7922,14 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -8410,12 +7950,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8428,12 +7966,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8445,13 +7981,12 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8464,52 +7999,20 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
}
define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_3_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
@@ -8518,56 +8021,20 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
}
define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_4_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
@@ -8580,12 +8047,12 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_u() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -8602,17 +8069,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -8622,17 +8089,17 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -8642,16 +8109,16 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -8665,65 +8132,23 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
@@ -8732,65 +8157,25 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_1() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
@@ -8799,62 +8184,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_2() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
@@ -8863,58 +8208,22 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_3() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
@@ -8927,14 +8236,14 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_5() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -8947,56 +8256,20 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -9027,172 +8300,427 @@ define void @s_shuffle_v4p0_v3p0__0_0_0_0() {
}
define void @s_shuffle_v4p0_v3p0__1_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_0_0_0:
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__2_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__4_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_0_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__2_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_0_0_0:
+define void @s_shuffle_v4p0_v3p0__5_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_1_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_1_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_2_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_2_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_3_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_4_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_0_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+define void @s_shuffle_v4p0_v3p0__5_5_1_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_0_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
@@ -9200,72 +8728,94 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__4_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0:
+define void @s_shuffle_v4p0_v3p0__5_5_2_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[16:21]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s20
+; GFX9-NEXT: s_mov_b32 s13, s21
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_3_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
; GFX900-NEXT: s_mov_b32 s12, s4
; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
; GFX90A-NEXT: s_mov_b32 s12, s4
; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_0_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -9273,70 +8823,66 @@ define void @s_shuffle_v4p0_v3p0__4_0_0_0() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_0_0:
+define void @s_shuffle_v4p0_v3p0__5_5_4_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[16:21]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[16:21]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
@@ -9344,3422 +8890,640 @@ define void @s_shuffle_v4p0_v3p0__5_0_0_0() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_0_0:
+define void @s_shuffle_v4p0_v3p0__u_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__0_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__1_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__2_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__3_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_1_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_0_0:
+define void @s_shuffle_v4p0_v3p0__5_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b32 s12, s10
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_2_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_3_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_0_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_0_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_4_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_2_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_2_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_3_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_3_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_4_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_1_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_5_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_2_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_3_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_5_0_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_4_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_5_2_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_2_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
+define void @s_shuffle_v4p0_v3p0__5_5_3_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_3_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+define void @s_shuffle_v4p0_v3p0__5_5_4_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_4_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+define void @s_shuffle_v4p0_v3p0__0_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+define void @s_shuffle_v4p0_v3p0__1_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_3_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_4_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_0_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_2_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_3_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_4_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__4_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_0_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[20:25]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s24
-; GFX900-NEXT: s_mov_b32 s9, s25
-; GFX900-NEXT: s_mov_b32 s10, s24
-; GFX900-NEXT: s_mov_b32 s11, s25
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[20:25]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s24
-; GFX90A-NEXT: s_mov_b32 s9, s25
-; GFX90A-NEXT: s_mov_b32 s10, s24
-; GFX90A-NEXT: s_mov_b32 s11, s25
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_1_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_3_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[20:25]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s24
-; GFX900-NEXT: s_mov_b32 s9, s25
-; GFX900-NEXT: s_mov_b32 s10, s24
-; GFX900-NEXT: s_mov_b32 s11, s25
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[20:25]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s24
-; GFX90A-NEXT: s_mov_b32 s9, s25
-; GFX90A-NEXT: s_mov_b32 s10, s24
-; GFX90A-NEXT: s_mov_b32 s11, s25
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_4_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[20:25]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s24
-; GFX900-NEXT: s_mov_b32 s9, s25
-; GFX900-NEXT: s_mov_b32 s10, s24
-; GFX900-NEXT: s_mov_b32 s11, s25
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[20:25]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s24
-; GFX90A-NEXT: s_mov_b32 s9, s25
-; GFX90A-NEXT: s_mov_b32 s10, s24
-; GFX90A-NEXT: s_mov_b32 s11, s25
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__0_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__4_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:21]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__2_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_3_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__3_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_3:
+define void @s_shuffle_v4p0_v3p0__4_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -12767,16 +9531,19 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -12784,1637 +9551,1983 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_0_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_3:
+define void @s_shuffle_v4p0_v3p0__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_3:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:21]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_1_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3:
+define void @s_shuffle_v4p0_v3p0__5_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:21]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_2_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3:
+define void @s_shuffle_v4p0_v3p0__5_0_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_0_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_4_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_3:
+define void @s_shuffle_v4p0_v3p0__5_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_3:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
; GFX942-NEXT: s_mov_b32 s10, s4
; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4:
+define void @s_shuffle_v4p0_v3p0__5_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4:
+define void @s_shuffle_v4p0_v3p0__5_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_4_4_4:
+define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__3_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+define void @s_shuffle_v4p0_v3p0__5_5_0_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+define void @s_shuffle_v4p0_v3p0__5_5_1_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_1_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_3_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_3_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_4_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_4_2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[12:17]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+define void @s_shuffle_v4p0_v3p0__u_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_3_3_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
+define void @s_shuffle_v4p0_v3p0__0_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4:
+define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4:
+define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__3_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__4_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__5_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_4_4:
+define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_4:
+define void @s_shuffle_v4p0_v3p0__5_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
+define void @s_shuffle_v4p0_v3p0__5_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_0_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_4:
+define void @s_shuffle_v4p0_v3p0__5_4_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_0_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_1_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_1_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4:
+define void @s_shuffle_v4p0_v3p0__5_5_2_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_2_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4:
+define void @s_shuffle_v4p0_v3p0__5_5_4_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_4_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_3_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_4:
+define void @s_shuffle_v4p0_v3p0__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_4:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+define void @s_shuffle_v4p0_v3p0__2_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__3_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5:
+define void @s_shuffle_v4p0_v3p0__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5:
+define void @s_shuffle_v4p0_v3p0__5_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5:
+define void @s_shuffle_v4p0_v3p0__5_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__3_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+define void @s_shuffle_v4p0_v3p0__5_3_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_3_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+define void @s_shuffle_v4p0_v3p0__5_5_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_u_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ; def s[4:9]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5:
+define void @s_shuffle_v4p0_v3p0__5_5_0_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_1_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_2_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5:
+define void @s_shuffle_v4p0_v3p0__5_5_3_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_3_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__u_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -14423,8 +11536,8 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s16
; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
@@ -14434,7 +11547,7 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -14443,8 +11556,8 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s16
; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
@@ -14454,7 +11567,7 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -14463,8 +11576,8 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s12, s4
; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: s_mov_b32 s14, s4
@@ -14475,25 +11588,25 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5:
+define void @s_shuffle_v4p0_v3p0__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14501,19 +11614,19 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14521,7 +11634,7 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -14530,10 +11643,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -14542,22 +11655,23 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_3_5_5:
+define void @s_shuffle_v4p0_v3p0__2_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s16
; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
@@ -14567,16 +11681,17 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s16
; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
@@ -14586,16 +11701,18 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_3_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s12, s4
; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: s_mov_b32 s14, s4
@@ -14606,20 +11723,20 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_4_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+define void @s_shuffle_v4p0_v3p0__3_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:13]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: s_mov_b32 s14, s12
; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
@@ -14628,399 +11745,518 @@ define void @s_shuffle_v4p0_v3p0__5_4_5_5() {
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:13]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:13]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v3p0__4_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_0_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_0_5:
+define void @s_shuffle_v4p0_v3p0__5_u_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_0_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_0_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_1_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5:
+define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:17]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_2_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5:
+define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:21]
+; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s20
-; GFX900-NEXT: s_mov_b32 s9, s21
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s20
-; GFX900-NEXT: s_mov_b32 s15, s21
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:21]
+; GFX90A-NEXT: ; def s[4:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s20
-; GFX90A-NEXT: s_mov_b32 s9, s21
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s20
-; GFX90A-NEXT: s_mov_b32 s15, s21
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s8
+; GFX942-NEXT: s_mov_b32 s13, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_3_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_3_5:
+define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_3_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_4_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_u_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_0_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_0_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:17]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_1_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_3_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_3_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v3p0__5_5_4_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_4_5:
+define void @s_shuffle_v4p0_v3p0__5_5_2_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_4_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_4_5:
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s8
+; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b32 s14, s8
+; GFX942-NEXT: s_mov_b32 s15, s9
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_3_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_3_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__5_5_4_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_4_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:9]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
index 257af574366a6..3e354c8006fd3 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
@@ -139,39 +139,33 @@ define void @v_shuffle_v4p0_v4p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__3_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -275,39 +269,33 @@ define void @v_shuffle_v4p0_v4p0__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -321,55 +309,42 @@ define void @v_shuffle_v4p0_v4p0__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -383,49 +358,43 @@ define void @v_shuffle_v4p0_v4p0__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -439,49 +408,43 @@ define void @v_shuffle_v4p0_v4p0__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_2_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -495,49 +458,43 @@ define void @v_shuffle_v4p0_v4p0__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -551,45 +508,40 @@ define void @v_shuffle_v4p0_v4p0__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -603,39 +555,40 @@ define void @v_shuffle_v4p0_v4p0__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -649,39 +602,40 @@ define void @v_shuffle_v4p0_v4p0__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_6_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -695,39 +649,40 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -741,51 +696,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -799,51 +757,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -857,51 +818,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -915,57 +879,52 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -979,42 +938,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1028,45 +987,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1080,45 +1036,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_6_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1132,48 +1085,45 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1187,64 +1137,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1258,58 +1198,55 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1323,58 +1260,55 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1388,57 +1322,55 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1452,57 +1384,52 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1516,48 +1443,52 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1571,51 +1502,52 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1629,42 +1561,43 @@ define void @v_shuffle_v4p0_v4p0__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1679,13 +1612,13 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1693,13 +1626,13 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1707,13 +1640,13 @@ define void @v_shuffle_v4p0_v4p0__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1775,15 +1708,15 @@ define void @v_shuffle_v4p0_v4p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1791,15 +1724,15 @@ define void @v_shuffle_v4p0_v4p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1807,15 +1740,15 @@ define void @v_shuffle_v4p0_v4p0__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1885,15 +1818,13 @@ define void @v_shuffle_v4p0_v4p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1903,15 +1834,13 @@ define void @v_shuffle_v4p0_v4p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1921,15 +1850,13 @@ define void @v_shuffle_v4p0_v4p0__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -1943,13 +1870,13 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1957,13 +1884,13 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v0, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1971,13 +1898,13 @@ define void @v_shuffle_v4p0_v4p0__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2121,20 +2048,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2142,20 +2065,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2163,21 +2082,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2192,19 +2106,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2212,19 +2123,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2232,20 +2140,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2260,64 +2164,58 @@ define void @v_shuffle_v4p0_v4p0__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_1_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v14, v4
+; GFX90A-NEXT: v_mov_b32_e32 v15, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v14, v4
+; GFX942-NEXT: v_mov_b32_e32 v15, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2332,19 +2230,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2352,19 +2250,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2372,19 +2270,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2399,18 +2297,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2418,18 +2317,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2437,18 +2337,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2463,21 +2364,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2485,21 +2384,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2507,21 +2404,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2536,19 +2431,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2556,19 +2451,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2576,20 +2471,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2604,18 +2498,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2623,18 +2518,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2642,19 +2538,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2669,18 +2565,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2688,18 +2585,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2707,19 +2605,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2733,19 +2631,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2753,18 +2650,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2772,19 +2667,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v0, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2799,18 +2691,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, v8
+; GFX900-NEXT: v_mov_b32_e32 v13, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2818,18 +2710,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v8
+; GFX90A-NEXT: v_mov_b32_e32 v13, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2837,19 +2729,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v8
+; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2864,18 +2755,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, v8
+; GFX900-NEXT: v_mov_b32_e32 v15, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2883,18 +2774,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, v8
+; GFX90A-NEXT: v_mov_b32_e32 v15, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2902,19 +2793,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v14, v8
+; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2928,21 +2818,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2950,20 +2839,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2971,20 +2858,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -2999,18 +2884,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3018,18 +2903,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3037,19 +2922,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3064,19 +2948,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3084,19 +2967,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3104,20 +2986,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3132,20 +3012,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v8
+; GFX900-NEXT: v_mov_b32_e32 v1, v9
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v4, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3153,20 +3031,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v8
+; GFX90A-NEXT: v_mov_b32_e32 v1, v9
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3174,26 +3050,24 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=v"()
- %vec1 = call <4 x ptr> asm "; def $0", "=v"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
+; GFX942-NEXT: v_mov_b32_e32 v0, v8
+; GFX942-NEXT: v_mov_b32_e32 v1, v9
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=v"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=v"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
store <4 x ptr> %shuf, ptr addrspace(1) %ptr, align 32
ret void
}
@@ -3403,13 +3277,13 @@ define void @v_shuffle_v4p0_v4p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3419,13 +3293,13 @@ define void @v_shuffle_v4p0_v4p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3435,13 +3309,13 @@ define void @v_shuffle_v4p0_v4p0__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3633,18 +3507,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3652,18 +3524,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3671,19 +3541,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3698,18 +3565,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3717,18 +3582,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3736,19 +3599,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3763,20 +3623,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3784,21 +3640,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3806,22 +3657,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3836,19 +3681,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3856,19 +3701,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3876,19 +3721,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3903,19 +3748,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3923,19 +3768,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3943,19 +3788,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -3970,20 +3815,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3991,20 +3835,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4012,20 +3855,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4040,19 +3882,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4060,19 +3902,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4080,20 +3922,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4108,18 +3949,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4127,18 +3969,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4146,19 +3989,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4173,18 +4016,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4192,18 +4036,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4211,19 +4056,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4237,51 +4082,55 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4295,51 +4144,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4354,18 +4206,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, v8
+; GFX900-NEXT: v_mov_b32_e32 v13, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4373,18 +4225,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, v8
+; GFX90A-NEXT: v_mov_b32_e32 v13, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4392,19 +4244,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v12, v8
+; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4418,19 +4269,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4438,18 +4290,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4457,18 +4309,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4483,18 +4335,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4502,18 +4354,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4521,19 +4373,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4548,19 +4400,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: v_mov_b32_e32 v9, v3
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4568,19 +4419,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4588,20 +4438,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4616,18 +4465,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v8
-; GFX900-NEXT: v_mov_b32_e32 v1, v9
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4635,18 +4485,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v8
-; GFX90A-NEXT: v_mov_b32_e32 v1, v9
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4654,19 +4505,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -4881,14 +4733,13 @@ define void @v_shuffle_v4p0_v4p0__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4898,14 +4749,13 @@ define void @v_shuffle_v4p0_v4p0__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4915,14 +4765,13 @@ define void @v_shuffle_v4p0_v4p0__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5114,18 +4963,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5133,18 +4980,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5152,19 +4997,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5179,18 +5021,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5198,18 +5038,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5217,19 +5055,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5244,20 +5079,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5265,20 +5096,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5286,21 +5113,16 @@ define void @v_shuffle_v4p0_v4p0__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5315,18 +5137,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v12
-; GFX900-NEXT: v_mov_b32_e32 v1, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5334,18 +5154,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v12
-; GFX90A-NEXT: v_mov_b32_e32 v1, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5353,19 +5171,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v12
-; GFX942-NEXT: v_mov_b32_e32 v1, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5380,19 +5195,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5400,19 +5215,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5420,19 +5235,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5447,20 +5262,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5468,20 +5282,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5489,20 +5302,19 @@ define void @v_shuffle_v4p0_v4p0__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5517,19 +5329,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5537,19 +5349,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5557,20 +5369,19 @@ define void @v_shuffle_v4p0_v4p0__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5585,18 +5396,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v12
-; GFX900-NEXT: v_mov_b32_e32 v9, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5604,18 +5416,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v8, v12
-; GFX90A-NEXT: v_mov_b32_e32 v9, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5623,19 +5436,19 @@ define void @v_shuffle_v4p0_v4p0__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5650,18 +5463,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5669,18 +5483,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5688,19 +5503,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5714,51 +5529,55 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5773,18 +5592,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v10, v12
; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5792,18 +5612,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: v_mov_b32_e32 v10, v12
; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5811,18 +5632,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: v_mov_b32_e32 v10, v12
; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5836,51 +5658,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5894,19 +5719,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5914,18 +5740,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5933,18 +5759,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -5959,18 +5785,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v4
-; GFX900-NEXT: v_mov_b32_e32 v9, v5
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5978,18 +5804,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5997,19 +5823,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6024,19 +5850,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v4
-; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6044,19 +5869,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6064,20 +5888,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6092,18 +5915,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v10
-; GFX900-NEXT: v_mov_b32_e32 v3, v11
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6111,18 +5935,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v10
-; GFX90A-NEXT: v_mov_b32_e32 v3, v11
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6130,19 +5955,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6156,42 +5982,43 @@ define void @v_shuffle_v4p0_v4p0__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6207,13 +6034,13 @@ define void @v_shuffle_v4p0_v4p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6223,13 +6050,13 @@ define void @v_shuffle_v4p0_v4p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v2, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6239,13 +6066,13 @@ define void @v_shuffle_v4p0_v4p0__0_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6261,11 +6088,13 @@ define void @v_shuffle_v4p0_v4p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6275,11 +6104,13 @@ define void @v_shuffle_v4p0_v4p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6289,11 +6120,13 @@ define void @v_shuffle_v4p0_v4p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6306,48 +6139,43 @@ define void @v_shuffle_v4p0_v4p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6360,42 +6188,43 @@ define void @v_shuffle_v4p0_v4p0__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6408,42 +6237,43 @@ define void @v_shuffle_v4p0_v4p0__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6457,17 +6287,17 @@ define void @v_shuffle_v4p0_v4p0__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v12, v6
; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -6476,17 +6306,17 @@ define void @v_shuffle_v4p0_v4p0__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v12, v6
; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -6495,17 +6325,18 @@ define void @v_shuffle_v4p0_v4p0__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v12, v6
; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6521,17 +6352,17 @@ define void @v_shuffle_v4p0_v4p0__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v14, v6
; GFX900-NEXT: v_mov_b32_e32 v15, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -6540,17 +6371,17 @@ define void @v_shuffle_v4p0_v4p0__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v14, v6
; GFX90A-NEXT: v_mov_b32_e32 v15, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -6559,17 +6390,18 @@ define void @v_shuffle_v4p0_v4p0__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v14, v6
; GFX942-NEXT: v_mov_b32_e32 v15, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6585,19 +6417,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6605,19 +6434,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6625,19 +6451,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6651,19 +6474,18 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6671,18 +6493,16 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6690,18 +6510,17 @@ define void @v_shuffle_v4p0_v4p0__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6718,60 +6537,49 @@ define void @v_shuffle_v4p0_v4p0__7_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6785,57 +6593,52 @@ define void @v_shuffle_v4p0_v4p0__7_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6849,66 +6652,52 @@ define void @v_shuffle_v4p0_v4p0__7_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v14
-; GFX90A-NEXT: v_mov_b32_e32 v3, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6922,63 +6711,59 @@ define void @v_shuffle_v4p0_v4p0__7_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v14
-; GFX900-NEXT: v_mov_b32_e32 v1, v15
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v20, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[10:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v14
-; GFX90A-NEXT: v_mov_b32_e32 v1, v15
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v18, v10
+; GFX90A-NEXT: v_mov_b32_e32 v19, v11
+; GFX90A-NEXT: global_store_dwordx4 v20, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v20, v[16:19], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v20, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[10:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v18, v10
+; GFX942-NEXT: v_mov_b32_e32 v19, v11
+; GFX942-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -6992,19 +6777,20 @@ define void @v_shuffle_v4p0_v4p0__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v8, v14
-; GFX900-NEXT: v_mov_b32_e32 v9, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7012,18 +6798,18 @@ define void @v_shuffle_v4p0_v4p0__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v8, v14
-; GFX90A-NEXT: v_mov_b32_e32 v9, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v10
+; GFX90A-NEXT: v_mov_b32_e32 v17, v11
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7031,18 +6817,18 @@ define void @v_shuffle_v4p0_v4p0__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v10
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v17, v11
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7056,19 +6842,20 @@ define void @v_shuffle_v4p0_v4p0__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v10, v14
-; GFX900-NEXT: v_mov_b32_e32 v11, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7076,18 +6863,18 @@ define void @v_shuffle_v4p0_v4p0__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v10, v14
-; GFX90A-NEXT: v_mov_b32_e32 v11, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v12
+; GFX90A-NEXT: v_mov_b32_e32 v17, v13
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7095,18 +6882,18 @@ define void @v_shuffle_v4p0_v4p0__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v12
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v17, v13
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7120,19 +6907,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7140,18 +6928,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7159,18 +6947,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7184,51 +6972,55 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7243,18 +7035,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v14
+; GFX900-NEXT: v_mov_b32_e32 v11, v15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7262,18 +7055,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v14
+; GFX90A-NEXT: v_mov_b32_e32 v11, v15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7281,18 +7075,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v14
+; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7307,18 +7102,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, v12
+; GFX900-NEXT: v_mov_b32_e32 v11, v13
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7326,18 +7122,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, v12
+; GFX90A-NEXT: v_mov_b32_e32 v11, v13
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7345,18 +7142,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, v12
+; GFX942-NEXT: v_mov_b32_e32 v11, v13
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7370,51 +7168,54 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7429,18 +7230,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v8
+; GFX900-NEXT: v_mov_b32_e32 v3, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7448,18 +7249,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v8
+; GFX90A-NEXT: v_mov_b32_e32 v3, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7467,18 +7268,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7493,19 +7295,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7513,19 +7314,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v9
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7533,20 +7333,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7561,18 +7360,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v12
-; GFX900-NEXT: v_mov_b32_e32 v5, v13
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7580,18 +7380,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v12
-; GFX90A-NEXT: v_mov_b32_e32 v5, v13
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7599,18 +7400,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7755,39 +7558,33 @@ define void @v_shuffle_v4p0_v4p0__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7812,15 +7609,15 @@ define void @v_shuffle_v4p0_v4p0__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: v_mov_b32_e32 v7, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7828,15 +7625,15 @@ define void @v_shuffle_v4p0_v4p0__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, v2
+; GFX90A-NEXT: v_mov_b32_e32 v7, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7844,15 +7641,15 @@ define void @v_shuffle_v4p0_v4p0__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, v2
+; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7924,15 +7721,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7942,15 +7737,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7960,15 +7753,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -7989,10 +7780,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8006,10 +7794,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8023,10 +7808,7 @@ define void @v_shuffle_v4p0_v4p0__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8043,18 +7825,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8064,19 +7842,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8086,20 +7859,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8114,18 +7881,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8133,18 +7898,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v14, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8152,19 +7915,17 @@ define void @v_shuffle_v4p0_v4p0__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8179,18 +7940,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8198,18 +7957,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8217,19 +7974,17 @@ define void @v_shuffle_v4p0_v4p0__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8244,18 +7999,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v8
-; GFX900-NEXT: v_mov_b32_e32 v11, v9
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8263,18 +8016,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v8
-; GFX90A-NEXT: v_mov_b32_e32 v11, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8282,18 +8033,17 @@ define void @v_shuffle_v4p0_v4p0__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8307,54 +8057,49 @@ define void @v_shuffle_v4p0_v4p0__7_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v0
-; GFX900-NEXT: v_mov_b32_e32 v9, v1
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v2
+; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: v_mov_b32_e32 v12, v0
-; GFX90A-NEXT: v_mov_b32_e32 v13, v1
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v2
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v2
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8371,14 +8116,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8388,14 +8132,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8405,14 +8148,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8429,13 +8171,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8445,13 +8187,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8461,13 +8203,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8481,48 +8223,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v0, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8539,14 +8275,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8556,14 +8292,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8573,14 +8309,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8597,14 +8333,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8614,14 +8350,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8631,14 +8367,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8655,14 +8391,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
+; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8672,14 +8408,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
+; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8689,14 +8425,14 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8713,60 +8449,48 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8783,14 +8507,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8800,14 +8523,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8817,14 +8539,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -8838,57 +8559,49 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, v6
+; GFX900-NEXT: v_mov_b32_e32 v1, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, v8
+; GFX900-NEXT: v_mov_b32_e32 v11, v9
+; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v6
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, v8
+; GFX90A-NEXT: v_mov_b32_e32 v11, v9
+; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v6
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, v8
+; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9150,15 +8863,15 @@ define void @v_shuffle_v4p0_v4p0__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v12, v10
+; GFX900-NEXT: v_mov_b32_e32 v13, v11
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v10
-; GFX900-NEXT: v_mov_b32_e32 v13, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9169,15 +8882,15 @@ define void @v_shuffle_v4p0_v4p0__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v12, v10
+; GFX90A-NEXT: v_mov_b32_e32 v13, v11
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v10
-; GFX90A-NEXT: v_mov_b32_e32 v13, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9188,15 +8901,15 @@ define void @v_shuffle_v4p0_v4p0__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, v10
+; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9366,13 +9079,13 @@ define void @v_shuffle_v4p0_v4p0__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9382,13 +9095,13 @@ define void @v_shuffle_v4p0_v4p0__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9398,13 +9111,13 @@ define void @v_shuffle_v4p0_v4p0__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9424,10 +9137,8 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9440,10 +9151,8 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9456,10 +9165,8 @@ define void @v_shuffle_v4p0_v4p0__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9476,18 +9183,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9497,18 +9200,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9518,19 +9217,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9545,18 +9239,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9564,18 +9256,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9583,19 +9273,17 @@ define void @v_shuffle_v4p0_v4p0__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9610,18 +9298,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v8
-; GFX900-NEXT: v_mov_b32_e32 v11, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9629,18 +9315,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v8
-; GFX90A-NEXT: v_mov_b32_e32 v11, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9648,19 +9332,17 @@ define void @v_shuffle_v4p0_v4p0__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9675,18 +9357,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v10
-; GFX900-NEXT: v_mov_b32_e32 v13, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9694,18 +9374,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v10
-; GFX90A-NEXT: v_mov_b32_e32 v13, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9713,20 +9391,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
%vec1 = call <4 x ptr> asm "; def $0", "=v"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
@@ -9741,16 +9418,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9760,16 +9434,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9779,16 +9450,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9805,14 +9473,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v2
; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9822,14 +9489,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9839,14 +9505,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9863,14 +9528,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9880,14 +9544,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9897,14 +9560,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9918,42 +9580,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -9968,18 +9630,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: v_mov_b32_e32 v11, v3
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9987,18 +9650,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v2
+; GFX90A-NEXT: v_mov_b32_e32 v11, v3
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10006,19 +9670,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v2
+; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10033,18 +9698,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: v_mov_b32_e32 v11, v3
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10052,18 +9718,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v2
+; GFX90A-NEXT: v_mov_b32_e32 v11, v3
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10071,19 +9738,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v2
+; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10098,18 +9766,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v2
+; GFX900-NEXT: v_mov_b32_e32 v11, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10117,18 +9786,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v2
+; GFX90A-NEXT: v_mov_b32_e32 v11, v3
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10136,19 +9806,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v2
+; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10163,18 +9834,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10184,16 +9855,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10203,16 +9874,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10226,42 +9897,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10278,13 +9949,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v0, v4
; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10294,13 +9965,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v0, v4
; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10310,13 +9981,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10578,15 +10249,15 @@ define void @v_shuffle_v4p0_v4p0__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v16, 0
+; GFX900-NEXT: v_mov_b32_e32 v14, v12
+; GFX900-NEXT: v_mov_b32_e32 v15, v13
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, v12
-; GFX900-NEXT: v_mov_b32_e32 v15, v13
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10597,15 +10268,15 @@ define void @v_shuffle_v4p0_v4p0__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v14, v12
+; GFX90A-NEXT: v_mov_b32_e32 v15, v13
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, v12
-; GFX90A-NEXT: v_mov_b32_e32 v15, v13
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10616,15 +10287,15 @@ define void @v_shuffle_v4p0_v4p0__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, v12
+; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, v12
-; GFX942-NEXT: v_mov_b32_e32 v15, v13
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10794,14 +10465,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10811,14 +10481,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10828,14 +10497,13 @@ define void @v_shuffle_v4p0_v4p0__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10855,10 +10523,8 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10871,10 +10537,8 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10887,10 +10551,8 @@ define void @v_shuffle_v4p0_v4p0__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10907,18 +10569,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v8, v6
-; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10928,18 +10586,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10949,19 +10603,14 @@ define void @v_shuffle_v4p0_v4p0__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -10976,18 +10625,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10995,18 +10642,16 @@ define void @v_shuffle_v4p0_v4p0__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11014,19 +10659,17 @@ define void @v_shuffle_v4p0_v4p0__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11041,18 +10684,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11060,18 +10701,16 @@ define void @v_shuffle_v4p0_v4p0__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11079,19 +10718,17 @@ define void @v_shuffle_v4p0_v4p0__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11106,18 +10743,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[2:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11125,18 +10760,16 @@ define void @v_shuffle_v4p0_v4p0__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[2:9]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11144,18 +10777,17 @@ define void @v_shuffle_v4p0_v4p0__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11172,16 +10804,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11191,16 +10820,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11210,16 +10836,13 @@ define void @v_shuffle_v4p0_v4p0__7_4_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11233,54 +10856,56 @@ define void @v_shuffle_v4p0_v4p0__7_5_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: v_mov_b32_e32 v8, v4
; GFX900-NEXT: v_mov_b32_e32 v9, v5
; GFX900-NEXT: v_mov_b32_e32 v10, v4
; GFX900-NEXT: v_mov_b32_e32 v11, v5
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_5_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v5
; GFX90A-NEXT: v_mov_b32_e32 v12, v4
; GFX90A-NEXT: v_mov_b32_e32 v13, v5
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v14, v4
+; GFX90A-NEXT: v_mov_b32_e32 v15, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[12:15], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v12, v4
; GFX942-NEXT: v_mov_b32_e32 v13, v5
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v14, v4
+; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[12:15], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11297,14 +10922,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11314,14 +10938,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11331,14 +10954,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11352,45 +10974,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11405,18 +11024,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11424,18 +11044,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11443,19 +11064,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11470,18 +11092,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v8
-; GFX900-NEXT: v_mov_b32_e32 v5, v9
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11489,18 +11112,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v8
-; GFX90A-NEXT: v_mov_b32_e32 v5, v9
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11508,19 +11132,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11535,18 +11160,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v10
-; GFX900-NEXT: v_mov_b32_e32 v7, v11
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v4
+; GFX900-NEXT: v_mov_b32_e32 v11, v5
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11554,18 +11180,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v10
-; GFX90A-NEXT: v_mov_b32_e32 v7, v11
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v4
+; GFX90A-NEXT: v_mov_b32_e32 v11, v5
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11573,19 +11200,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v4
+; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11600,19 +11228,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v6
-; GFX900-NEXT: v_mov_b32_e32 v11, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v12
+; GFX900-NEXT: v_mov_b32_e32 v9, v13
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11622,17 +11249,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v12
+; GFX90A-NEXT: v_mov_b32_e32 v9, v13
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v6
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11642,18 +11268,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v12
+; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11670,13 +11294,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11686,13 +11310,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v4
; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11702,13 +11326,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11722,45 +11346,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_5_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_5_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11774,42 +11395,43 @@ define void @v_shuffle_v4p0_v4p0__u_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__u_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__u_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__u_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11824,18 +11446,18 @@ define void @v_shuffle_v4p0_v4p0__0_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[10:17]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v18, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, v6
+; GFX900-NEXT: v_mov_b32_e32 v13, v7
+; GFX900-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v18, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11843,18 +11465,18 @@ define void @v_shuffle_v4p0_v4p0__0_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[10:17]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v6
+; GFX90A-NEXT: v_mov_b32_e32 v13, v7
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11862,19 +11484,19 @@ define void @v_shuffle_v4p0_v4p0__0_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[10:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, v6
+; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11889,18 +11511,18 @@ define void @v_shuffle_v4p0_v4p0__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, v6
+; GFX900-NEXT: v_mov_b32_e32 v13, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11908,18 +11530,18 @@ define void @v_shuffle_v4p0_v4p0__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v6
+; GFX90A-NEXT: v_mov_b32_e32 v13, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v16, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11927,19 +11549,19 @@ define void @v_shuffle_v4p0_v4p0__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, v6
+; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -11954,18 +11576,18 @@ define void @v_shuffle_v4p0_v4p0__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: v_mov_b32_e32 v12, v6
+; GFX900-NEXT: v_mov_b32_e32 v13, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11973,18 +11595,18 @@ define void @v_shuffle_v4p0_v4p0__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: v_mov_b32_e32 v12, v6
+; GFX90A-NEXT: v_mov_b32_e32 v13, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11992,19 +11614,19 @@ define void @v_shuffle_v4p0_v4p0__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b32_e32 v12, v6
+; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12018,20 +11640,18 @@ define void @v_shuffle_v4p0_v4p0__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__3_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12041,17 +11661,16 @@ define void @v_shuffle_v4p0_v4p0__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12061,17 +11680,16 @@ define void @v_shuffle_v4p0_v4p0__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12088,13 +11706,13 @@ define void @v_shuffle_v4p0_v4p0__4_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12104,13 +11722,13 @@ define void @v_shuffle_v4p0_v4p0__4_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v2, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12120,13 +11738,13 @@ define void @v_shuffle_v4p0_v4p0__4_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12143,11 +11761,13 @@ define void @v_shuffle_v4p0_v4p0__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12157,11 +11777,13 @@ define void @v_shuffle_v4p0_v4p0__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12171,11 +11793,13 @@ define void @v_shuffle_v4p0_v4p0__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12189,48 +11813,43 @@ define void @v_shuffle_v4p0_v4p0__6_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__6_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__6_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__6_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12244,48 +11863,43 @@ define void @v_shuffle_v4p0_v4p0__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_u_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12299,64 +11913,61 @@ define void @v_shuffle_v4p0_v4p0__7_0_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_0_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v18, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
; GFX900-NEXT: v_mov_b32_e32 v8, v6
; GFX900-NEXT: v_mov_b32_e32 v9, v7
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[10:17]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, v0
-; GFX900-NEXT: v_mov_b32_e32 v11, v1
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_0_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[10:17]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_0_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[10:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12371,18 +11982,19 @@ define void @v_shuffle_v4p0_v4p0__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
; GFX900-NEXT: v_mov_b32_e32 v8, v10
; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: v_mov_b32_e32 v0, v10
-; GFX900-NEXT: v_mov_b32_e32 v1, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12390,18 +12002,19 @@ define void @v_shuffle_v4p0_v4p0__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: v_mov_b32_e32 v8, v10
; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: v_mov_b32_e32 v0, v10
-; GFX90A-NEXT: v_mov_b32_e32 v1, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12409,19 +12022,20 @@ define void @v_shuffle_v4p0_v4p0__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v8, v10
; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12436,18 +12050,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: v_mov_b32_e32 v2, v12
-; GFX900-NEXT: v_mov_b32_e32 v3, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12455,18 +12070,19 @@ define void @v_shuffle_v4p0_v4p0__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: v_mov_b32_e32 v2, v12
-; GFX90A-NEXT: v_mov_b32_e32 v3, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[2:5], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12474,19 +12090,20 @@ define void @v_shuffle_v4p0_v4p0__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12501,18 +12118,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: v_mov_b32_e32 v4, v14
-; GFX900-NEXT: v_mov_b32_e32 v5, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v10
+; GFX900-NEXT: v_mov_b32_e32 v9, v11
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12520,18 +12138,19 @@ define void @v_shuffle_v4p0_v4p0__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[8:15]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: v_mov_b32_e32 v4, v14
-; GFX90A-NEXT: v_mov_b32_e32 v5, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v10
+; GFX90A-NEXT: v_mov_b32_e32 v9, v11
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12539,18 +12158,20 @@ define void @v_shuffle_v4p0_v4p0__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[8:15]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v10
+; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12564,54 +12185,52 @@ define void @v_shuffle_v4p0_v4p0__7_4_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_4_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_4_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v0
+; GFX90A-NEXT: v_mov_b32_e32 v9, v1
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v0
+; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12625,48 +12244,52 @@ define void @v_shuffle_v4p0_v4p0__7_5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_5_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v2
+; GFX900-NEXT: v_mov_b32_e32 v9, v3
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_5_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v2
+; GFX90A-NEXT: v_mov_b32_e32 v9, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v2
+; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12680,48 +12303,52 @@ define void @v_shuffle_v4p0_v4p0__7_6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v4
+; GFX900-NEXT: v_mov_b32_e32 v9, v5
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_6_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v4
+; GFX90A-NEXT: v_mov_b32_e32 v9, v5
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v4
+; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12735,45 +12362,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_u_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_u_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12788,18 +12412,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:9]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v8
-; GFX900-NEXT: v_mov_b32_e32 v3, v9
-; GFX900-NEXT: v_mov_b32_e32 v6, v8
-; GFX900-NEXT: v_mov_b32_e32 v7, v9
-; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12807,18 +12432,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v16, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:9]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v8
-; GFX90A-NEXT: v_mov_b32_e32 v3, v9
-; GFX90A-NEXT: v_mov_b32_e32 v6, v8
-; GFX90A-NEXT: v_mov_b32_e32 v7, v9
-; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v16, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12826,19 +12452,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: v_mov_b32_e32 v16, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:9]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
-; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12853,18 +12480,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[6:13]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v14, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:11]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v10
-; GFX900-NEXT: v_mov_b32_e32 v5, v11
-; GFX900-NEXT: v_mov_b32_e32 v8, v10
-; GFX900-NEXT: v_mov_b32_e32 v9, v11
-; GFX900-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -12872,18 +12500,19 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[6:13]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v14, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:11]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v10
-; GFX90A-NEXT: v_mov_b32_e32 v5, v11
-; GFX90A-NEXT: v_mov_b32_e32 v8, v10
-; GFX90A-NEXT: v_mov_b32_e32 v9, v11
-; GFX90A-NEXT: global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v14, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12891,19 +12520,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:11]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
-; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v14, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12918,37 +12548,39 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v12, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[6:13]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v14, 0
-; GFX900-NEXT: v_mov_b32_e32 v6, v12
-; GFX900-NEXT: v_mov_b32_e32 v7, v13
-; GFX900-NEXT: v_mov_b32_e32 v10, v12
-; GFX900-NEXT: v_mov_b32_e32 v11, v13
-; GFX900-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, v6
+; GFX900-NEXT: v_mov_b32_e32 v11, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_2_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:7]
+; GFX90A-NEXT: ; def v[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v12, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[6:13]
+; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v14, 0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v12
-; GFX90A-NEXT: v_mov_b32_e32 v7, v13
-; GFX90A-NEXT: v_mov_b32_e32 v10, v12
-; GFX90A-NEXT: v_mov_b32_e32 v11, v13
-; GFX90A-NEXT: global_store_dwordx4 v14, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v14, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v10, v6
+; GFX90A-NEXT: v_mov_b32_e32 v11, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
+; GFX90A-NEXT: s_nop 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v12, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -12956,19 +12588,20 @@ define void @v_shuffle_v4p0_v4p0__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:7]
+; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[6:13]
+; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
-; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v10, v6
+; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -12983,19 +12616,18 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[8:15]
+; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v16, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:7]
+; GFX900-NEXT: ; def v[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v12, v6
-; GFX900-NEXT: v_mov_b32_e32 v13, v7
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v12, v14
-; GFX900-NEXT: v_mov_b32_e32 v13, v15
-; GFX900-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v14
+; GFX900-NEXT: v_mov_b32_e32 v9, v15
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v16, v14
+; GFX900-NEXT: v_mov_b32_e32 v17, v15
+; GFX900-NEXT: global_store_dwordx4 v0, v[6:9], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v0, v[14:17], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13005,17 +12637,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v16, 0
+; GFX90A-NEXT: v_mov_b32_e32 v18, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, v14
+; GFX90A-NEXT: v_mov_b32_e32 v9, v15
+; GFX90A-NEXT: v_mov_b32_e32 v16, v14
+; GFX90A-NEXT: v_mov_b32_e32 v17, v15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v12, v6
-; GFX90A-NEXT: v_mov_b32_e32 v13, v7
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v12, v14
-; GFX90A-NEXT: v_mov_b32_e32 v13, v15
-; GFX90A-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v18, v[6:9], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v18, v[14:17], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13025,18 +12656,16 @@ define void @v_shuffle_v4p0_v4p0__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v16, 0
+; GFX942-NEXT: v_mov_b32_e32 v18, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, v14
+; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b32_e32 v16, v14
+; GFX942-NEXT: v_mov_b32_e32 v17, v15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
-; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -13053,13 +12682,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v6
; GFX900-NEXT: v_mov_b32_e32 v3, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13069,13 +12698,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13085,13 +12714,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -13108,11 +12737,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -13122,11 +12753,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -13136,11 +12769,13 @@ define void @v_shuffle_v4p0_v4p0__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] offset:16
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] offset:16
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -13154,45 +12789,42 @@ define void @v_shuffle_v4p0_v4p0__7_7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p0_v4p0__7_7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX900-NEXT: s_nop 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, v6
+; GFX900-NEXT: v_mov_b32_e32 v9, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p0_v4p0__7_7_6_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
-; GFX90A-NEXT: s_nop 0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, v6
+; GFX90A-NEXT: v_mov_b32_e32 v9, v7
+; GFX90A-NEXT: global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
+; GFX90A-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_7_6_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
-; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16
+; GFX942-NEXT: v_mov_b32_e32 v8, v6
+; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=v"()
@@ -13328,10 +12960,9 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13474,10 +13105,9 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13750,15 +13380,14 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13895,242 +13524,435 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() {
}
define void @s_shuffle_v4p0_v4p0__7_7_u_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u:
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_u:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_u:
+define void @s_shuffle_v4p0_v4p0__7_7_2_u() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_u:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_u:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u:
+define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_u:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u:
+define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_6_u() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_7_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_u:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u:
+define void @s_shuffle_v4p0_v4p0__7_7_7_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_7_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_7_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -14141,14 +13963,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -14159,3681 +13981,180 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_u:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_6_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_u:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> zeroinitializer
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_1_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_2_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_3_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_4_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_5_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_6_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_1_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_2_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_3_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s22
-; GFX900-NEXT: s_mov_b32 s13, s23
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s22
-; GFX90A-NEXT: s_mov_b32 s13, s23
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_4_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_5_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_6_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_3_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_4_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_5_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_2_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_3_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__7_7_7_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__7_7_7_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+define void @s_shuffle_v4p0_v4p0__7_7_7_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s18
; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s18
; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
; GFX942-NEXT: s_mov_b32 s12, s6
; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__7_7_7_7() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
+define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
@@ -17844,41 +14165,43 @@ define void @s_shuffle_v4p0_v4p0__u_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
+define void @s_shuffle_v4p0_v4p0__0_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s[8:15]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
+define void @s_shuffle_v4p0_v4p0__1_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: s_mov_b32 s14, s12
@@ -17888,43 +14211,43 @@ define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
+define void @s_shuffle_v4p0_v4p0__2_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
+define void @s_shuffle_v4p0_v4p0__3_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
; GFX9-NEXT: s_mov_b32 s14, s12
@@ -17934,17 +14257,17 @@ define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
+define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_0_0_0:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s10, s12
; GFX9-NEXT: s_mov_b32 s11, s13
@@ -17955,17 +14278,17 @@ define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+define void @s_shuffle_v4p0_v4p0__5_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
@@ -17981,11 +14304,11 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
@@ -18001,11 +14324,11 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
@@ -18022,21 +14345,48 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+define void @s_shuffle_v4p0_v4p0__6_0_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_0_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -18046,15 +14396,17 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -18064,17 +14416,17 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s12
@@ -18085,25 +14437,23 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18111,19 +14461,17 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18131,19 +14479,17 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -18152,23 +14498,186 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_1_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_2_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_3_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_3_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+define void @s_shuffle_v4p0_v4p0__7_4_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18176,17 +14685,19 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18194,17 +14705,19 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -18213,57 +14726,53 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+define void @s_shuffle_v4p0_v4p0__7_5_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18274,35 +14783,35 @@ define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+define void @s_shuffle_v4p0_v4p0__7_6_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18310,17 +14819,19 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18328,17 +14839,19 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
@@ -18347,25 +14860,48 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_0_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18373,19 +14909,17 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18393,113 +14927,165 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_1_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_2_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_2_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[16:23]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s20
+; GFX9-NEXT: s_mov_b32 s13, s21
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_3_0() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_3_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[16:23]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s22
+; GFX9-NEXT: s_mov_b32 s13, s23
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_4_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -18507,19 +15093,19 @@ define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -18527,137 +15113,141 @@ define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: s_mov_b32 s12, s4
; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_5_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_6_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18666,124 +15256,197 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__u_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__0_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__1_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__2_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__4_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18792,165 +15455,117 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -18961,16 +15576,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -18981,16 +15594,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18999,25 +15610,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+define void @s_shuffle_v4p0_v4p0__7_0_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19028,16 +15637,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19048,16 +15657,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19066,238 +15675,131 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+define void @s_shuffle_v4p0_v4p0__7_2_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s16
+; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_3_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_4_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19305,19 +15807,19 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19325,19 +15827,19 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -19346,49 +15848,53 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_5_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19397,37 +15903,37 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -19435,19 +15941,19 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -19455,19 +15961,19 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
@@ -19476,49 +15982,224 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_2_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_3_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s18
+; GFX9-NEXT: s_mov_b32 s13, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19527,67 +16208,197 @@ define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b32 s15, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+define void @s_shuffle_v4p0_v4p0__u_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__0_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__2_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19596,88 +16407,86 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19690,14 +16499,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19710,14 +16519,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19730,21 +16539,21 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: s_mov_b32 s10, s12
; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19755,16 +16564,14 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19775,16 +16582,14 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19795,63 +16600,65 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19862,23 +16669,25 @@ define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19889,16 +16698,14 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19909,16 +16716,14 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19929,23 +16734,21 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19956,14 +16759,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19974,14 +16779,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19992,23 +16799,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20019,12 +16826,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20035,12 +16846,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20051,126 +16866,63 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20181,57 +16933,63 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s14
; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20242,21 +17000,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20267,14 +17027,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20285,41 +17045,39 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20330,14 +17088,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20348,41 +17106,172 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s14, s12
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s18
+; GFX9-NEXT: s_mov_b32 s13, s19
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20395,14 +17284,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20415,14 +17304,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20435,580 +17324,648 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s14
; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4:
+define void @s_shuffle_v4p0_v4p0__u_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__0_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__1_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+define void @s_shuffle_v4p0_v4p0__2_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_3_3_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s12
+; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21019,779 +17976,738 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s18
+; GFX9-NEXT: s_mov_b32 s15, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s14, s18
+; GFX9-NEXT: s_mov_b32 s15, s19
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+define void @s_shuffle_v4p0_v4p0__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21799,533 +18715,772 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s6
; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+define void @s_shuffle_v4p0_v4p0__6_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s10, s4
+; GFX9-NEXT: s_mov_b32 s11, s5
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s14, s12
+; GFX942-NEXT: s_mov_b32 s15, s13
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_3_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_5_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_6_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s14, s12
+; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s8, s10
; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_0_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[12:19]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_1_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s10
; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s4
+; GFX9-NEXT: s_mov_b32 s15, s5
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s20
+; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s20
+; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__u_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22334,8 +19489,8 @@ define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22343,7 +19498,7 @@ define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22352,8 +19507,8 @@ define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22361,241 +19516,385 @@ define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s10, s14
+; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s2
+; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+define void @s_shuffle_v4p0_v4p0__2_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s14
+; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__6_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s10, s6
+; GFX9-NEXT: s_mov_b32 s11, s7
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s14
+; GFX9-NEXT: s_mov_b32 s9, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s4
; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s4
; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
; GFX942-NEXT: s_mov_b32 s10, s0
; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22603,16 +19902,17 @@ define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22620,138 +19920,210 @@ define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s8, s18
+; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s14
+; GFX942-NEXT: s_mov_b32 s13, s15
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_4_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s12
+; GFX9-NEXT: s_mov_b32 s11, s13
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s16
+; GFX9-NEXT: s_mov_b32 s11, s17
+; GFX9-NEXT: s_mov_b32 s12, s14
+; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
@@ -22760,65 +20132,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
}
define void @s_shuffle_v4p0_v4p0__7_7_0_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
@@ -22831,17 +20161,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_5() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -22851,17 +20181,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_5() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -22936,15 +20266,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23001,17 +20330,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_5() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23024,118 +20353,46 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_5() {
}
define void @s_shuffle_v4p0_v4p0__7_7_4_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_7_6_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s6
+; GFX9-NEXT: s_mov_b32 s15, s7
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_6_5() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s18
+; GFX9-NEXT: s_mov_b32 s11, s19
+; GFX9-NEXT: s_mov_b32 s12, s16
+; GFX9-NEXT: s_mov_b32 s13, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
@@ -23304,17 +20561,17 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -23324,17 +20581,17 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -23344,17 +20601,18 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s10, s4
+; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23484,14 +20742,14 @@ define void @s_shuffle_v4p0_v4p0__6_6_6_6() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
+; GFX9-NEXT: s_mov_b32 s10, s8
+; GFX9-NEXT: s_mov_b32 s11, s9
+; GFX9-NEXT: s_mov_b32 s12, s8
+; GFX9-NEXT: s_mov_b32 s13, s9
+; GFX9-NEXT: s_mov_b32 s14, s8
+; GFX9-NEXT: s_mov_b32 s15, s9
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -23755,17 +21013,17 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -23775,17 +21033,17 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -23795,17 +21053,18 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s4
+; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23990,19 +21249,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s14, s20
+; GFX900-NEXT: s_mov_b32 s15, s21
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24012,19 +21269,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s14, s20
+; GFX90A-NEXT: s_mov_b32 s15, s21
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24039,14 +21294,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s14, s4
+; GFX942-NEXT: s_mov_b32 s15, s5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24272,58 +21525,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_6() {
}
define void @s_shuffle_v4p0_v4p0__7_7_4_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_6:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_6:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_6:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_6:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s18
+; GFX9-NEXT: s_mov_b32 s9, s19
+; GFX9-NEXT: s_mov_b32 s10, s18
+; GFX9-NEXT: s_mov_b32 s11, s19
+; GFX9-NEXT: s_mov_b32 s14, s16
+; GFX9-NEXT: s_mov_b32 s15, s17
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 6>
@@ -24400,12 +21617,12 @@ define void @s_shuffle_v4p0_v4p0__u_7_7_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -24489,17 +21706,17 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24509,17 +21726,17 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24529,17 +21746,18 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_mov_b32 s8, s2
; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24556,17 +21774,17 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24576,17 +21794,17 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24596,17 +21814,18 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s10, s6
+; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24623,17 +21842,17 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -24643,17 +21862,17 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -24665,15 +21884,16 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s14
+; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b32 s12, s10
+; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24712,14 +21932,14 @@ define void @s_shuffle_v4p0_v4p0__5_7_7_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s8, s6
+; GFX9-NEXT: s_mov_b32 s9, s7
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -24736,14 +21956,12 @@ define void @s_shuffle_v4p0_v4p0__6_7_7_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
+; GFX9-NEXT: s_mov_b32 s12, s10
+; GFX9-NEXT: s_mov_b32 s13, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -24983,17 +22201,17 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25003,17 +22221,17 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25023,17 +22241,18 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s8, s6
+; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b32 s12, s6
+; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s14, s6
+; GFX942-NEXT: s_mov_b32 s15, s7
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25160,12 +22379,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_7() {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
+; GFX9-NEXT: ; def s[4:11]
; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s[8:15]
; GFX9-NEXT: ;;#ASMEND
@@ -25178,65 +22397,23 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_0_7() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_0_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[12:19]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 7>
@@ -25249,17 +22426,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25269,17 +22446,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25289,17 +22466,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25319,14 +22496,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() {
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s14, s22
-; GFX900-NEXT: s_mov_b32 s15, s23
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25339,14 +22514,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() {
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s14, s22
-; GFX90A-NEXT: s_mov_b32 s15, s23
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25358,15 +22531,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25383,17 +22555,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
@@ -25403,17 +22575,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
@@ -25425,15 +22597,15 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b32 s12, s14
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b32 s14, s10
+; GFX942-NEXT: s_mov_b32 s15, s11
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25446,58 +22618,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_4_7() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_4_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s4
+; GFX9-NEXT: s_mov_b32 s13, s5
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 7>
@@ -25506,62 +22642,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_5_7() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_7:
-; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; use s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_7:
-; GFX90A: ; %bb.0:
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; use s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_7:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; use s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_5_7:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; def s[4:11]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_mov_b32 s8, s10
+; GFX9-NEXT: s_mov_b32 s9, s11
+; GFX9-NEXT: s_mov_b32 s12, s6
+; GFX9-NEXT: s_mov_b32 s13, s7
+; GFX9-NEXT: s_mov_b32 s14, s10
+; GFX9-NEXT: s_mov_b32 s15, s11
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
index 90a1b99dc7c14..1cf5c6cd3f286 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v2p3.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4p3_v2p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -111,12 +110,11 @@ define void @v_shuffle_v4p3_v2p3__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -154,15 +152,14 @@ define void @v_shuffle_v4p3_v2p3__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -207,15 +204,14 @@ define void @v_shuffle_v4p3_v2p3__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -262,10 +258,10 @@ define void @v_shuffle_v4p3_v2p3__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -306,12 +302,12 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -349,15 +345,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -402,14 +398,14 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -457,11 +453,11 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -502,13 +498,13 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_3_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -548,16 +544,16 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -604,15 +600,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -660,12 +656,12 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -705,14 +701,14 @@ define void @v_shuffle_v4p3_v2p3__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -850,14 +846,14 @@ define void @v_shuffle_v4p3_v2p3__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__1_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -945,16 +941,15 @@ define void @v_shuffle_v4p3_v2p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1001,15 +996,14 @@ define void @v_shuffle_v4p3_v2p3__3_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1056,16 +1050,15 @@ define void @v_shuffle_v4p3_v2p3__3_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1114,15 +1107,15 @@ define void @v_shuffle_v4p3_v2p3__3_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1169,15 +1162,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1223,16 +1216,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1278,16 +1270,16 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1333,17 +1325,16 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1483,14 +1474,14 @@ define void @v_shuffle_v4p3_v2p3__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1577,17 +1568,16 @@ define void @v_shuffle_v4p3_v2p3__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1635,16 +1625,15 @@ define void @v_shuffle_v4p3_v2p3__3_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1690,16 +1679,15 @@ define void @v_shuffle_v4p3_v2p3__3_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1748,13 +1736,13 @@ define void @v_shuffle_v4p3_v2p3__3_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1801,15 +1789,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1855,15 +1843,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1907,15 +1895,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1962,14 +1950,13 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2067,12 +2054,11 @@ define void @v_shuffle_v4p3_v2p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2120,14 +2106,14 @@ define void @v_shuffle_v4p3_v2p3__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2167,13 +2153,13 @@ define void @v_shuffle_v4p3_v2p3__3_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v1, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2214,15 +2200,15 @@ define void @v_shuffle_v4p3_v2p3__3_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2269,15 +2255,15 @@ define void @v_shuffle_v4p3_v2p3__3_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2323,14 +2309,14 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2372,14 +2358,13 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2423,15 +2408,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2481,15 +2466,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:4]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2638,16 +2623,15 @@ define void @v_shuffle_v4p3_v2p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2742,13 +2726,13 @@ define void @v_shuffle_v4p3_v2p3__3_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v2, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2789,15 +2773,15 @@ define void @v_shuffle_v4p3_v2p3__3_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2844,15 +2828,15 @@ define void @v_shuffle_v4p3_v2p3__3_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:1]
+; GFX900-NEXT: ; def v[1:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2898,13 +2882,14 @@ define void @v_shuffle_v4p3_v2p3__3_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2944,13 +2929,13 @@ define void @v_shuffle_v4p3_v2p3__3_3_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v2, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v2, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2991,15 +2976,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[3:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3046,15 +3031,15 @@ define void @v_shuffle_v4p3_v2p3__3_3_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:2]
+; GFX900-NEXT: ; def v[2:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3102,13 +3087,14 @@ define void @v_shuffle_v4p3_v2p3__3_3_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v2p3__3_3_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:3]
+; GFX900-NEXT: ; def v[0:1]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
index bcb20e85b2e94..3253b4914420f 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v3p3.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4p3_v3p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4p3_v3p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -153,12 +149,11 @@ define void @v_shuffle_v4p3_v3p3__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__4_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -196,36 +191,33 @@ define void @v_shuffle_v4p3_v3p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -239,48 +231,45 @@ define void @v_shuffle_v4p3_v3p3__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_0_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -294,46 +283,43 @@ define void @v_shuffle_v4p3_v3p3__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -347,16 +333,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -364,15 +348,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -380,15 +363,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -404,37 +386,35 @@ define void @v_shuffle_v4p3_v3p3__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -448,36 +428,37 @@ define void @v_shuffle_v4p3_v3p3__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -491,39 +472,37 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -537,51 +516,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -596,15 +570,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -612,16 +585,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -629,17 +601,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -654,15 +624,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -670,15 +639,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -686,16 +654,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -711,40 +678,38 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -758,42 +723,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -807,39 +770,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -853,50 +817,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -911,15 +876,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -927,15 +892,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -943,16 +908,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -967,15 +932,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -983,16 +948,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1000,16 +965,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1025,43 +990,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1075,45 +1038,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1127,42 +1088,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1189,29 +1151,26 @@ define void @v_shuffle_v4p3_v3p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__u_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__u_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1273,42 +1232,39 @@ define void @v_shuffle_v4p3_v3p3__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__1_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1321,45 +1277,43 @@ define void @v_shuffle_v4p3_v3p3__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1385,29 +1339,26 @@ define void @v_shuffle_v4p3_v3p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__3_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__3_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1421,16 +1372,15 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1438,15 +1388,14 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -1455,16 +1404,15 @@ define void @v_shuffle_v4p3_v3p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1480,16 +1428,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1497,17 +1444,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1515,17 +1460,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1540,49 +1483,44 @@ define void @v_shuffle_v4p3_v3p3__5_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: global_store_dwordx4 v5, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: global_store_dwordx4 v5, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1597,16 +1535,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1614,17 +1551,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1632,17 +1567,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1657,16 +1590,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1676,15 +1608,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1694,15 +1624,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1717,16 +1645,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1734,17 +1661,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1752,17 +1677,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1777,15 +1700,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1793,16 +1716,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1810,16 +1732,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1834,16 +1755,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1851,17 +1771,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1869,17 +1787,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1893,17 +1809,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1911,16 +1825,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1928,16 +1841,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1951,17 +1863,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1969,16 +1880,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1986,17 +1896,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2010,53 +1918,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2070,53 +1976,51 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v9, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[6:8]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v9, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v9, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[6:8]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v9, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2130,17 +2034,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[5:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2148,16 +2051,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2165,17 +2067,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2282,11 +2183,11 @@ define void @v_shuffle_v4p3_v3p3__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2327,42 +2228,43 @@ define void @v_shuffle_v4p3_v3p3__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2421,16 +2323,15 @@ define void @v_shuffle_v4p3_v3p3__4_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2480,16 +2381,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2497,16 +2397,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2514,17 +2413,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2539,15 +2436,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2555,16 +2451,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2572,17 +2466,14 @@ define void @v_shuffle_v4p3_v3p3__5_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2597,15 +2488,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2613,17 +2503,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2631,17 +2519,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2656,16 +2542,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2675,15 +2560,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2693,15 +2576,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2718,14 +2599,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2733,17 +2613,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2751,17 +2629,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2776,15 +2652,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2792,15 +2668,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2808,16 +2684,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2832,16 +2707,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2849,17 +2723,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2867,17 +2739,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2891,51 +2761,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2949,52 +2814,46 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3009,16 +2868,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3028,15 +2886,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3046,15 +2902,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3069,16 +2923,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3086,17 +2939,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3104,17 +2955,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3129,16 +2979,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3146,17 +2995,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3164,17 +3011,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3188,13 +3034,13 @@ define void @v_shuffle_v4p3_v3p3__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3278,14 +3124,13 @@ define void @v_shuffle_v4p3_v3p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3324,42 +3169,43 @@ define void @v_shuffle_v4p3_v3p3__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3372,13 +3218,13 @@ define void @v_shuffle_v4p3_v3p3__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3418,16 +3264,15 @@ define void @v_shuffle_v4p3_v3p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3474,17 +3319,16 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3492,16 +3336,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3509,16 +3352,15 @@ define void @v_shuffle_v4p3_v3p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3532,48 +3374,46 @@ define void @v_shuffle_v4p3_v3p3__5_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3588,15 +3428,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3606,15 +3445,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3624,15 +3462,14 @@ define void @v_shuffle_v4p3_v3p3__5_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3646,48 +3483,46 @@ define void @v_shuffle_v4p3_v3p3__5_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3704,14 +3539,13 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3719,16 +3553,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3736,16 +3569,15 @@ define void @v_shuffle_v4p3_v3p3__5_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3760,15 +3592,15 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3778,14 +3610,13 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3795,14 +3626,13 @@ define void @v_shuffle_v4p3_v3p3__5_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3817,16 +3647,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3834,16 +3663,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3851,16 +3679,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3875,15 +3702,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3891,16 +3717,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
-; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3908,16 +3733,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3932,16 +3756,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[5:7]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3951,15 +3774,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3969,15 +3790,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3992,15 +3811,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4008,16 +3826,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4025,16 +3842,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4049,53 +3866,49 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v6
-; GFX90A-NEXT: v_mov_b32_e32 v9, v6
-; GFX90A-NEXT: v_mov_b32_e32 v10, v4
-; GFX90A-NEXT: v_mov_b32_e32 v11, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v6
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4110,16 +3923,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4127,16 +3939,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4144,17 +3955,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4218,12 +4028,11 @@ define void @v_shuffle_v4p3_v3p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4260,36 +4069,33 @@ define void @v_shuffle_v4p3_v3p3__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v3, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4314,42 +4120,39 @@ define void @v_shuffle_v4p3_v3p3__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4363,45 +4166,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4415,14 +4216,13 @@ define void @v_shuffle_v4p3_v3p3__5_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4464,15 +4264,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4480,17 +4280,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4498,17 +4297,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4523,15 +4321,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4539,16 +4337,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4556,17 +4353,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4581,51 +4377,49 @@ define void @v_shuffle_v4p3_v3p3__5_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v4
-; GFX90A-NEXT: v_mov_b32_e32 v9, v4
-; GFX90A-NEXT: global_store_dwordx4 v3, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v4
-; GFX942-NEXT: global_store_dwordx4 v3, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4641,43 +4435,41 @@ define void @v_shuffle_v4p3_v3p3__5_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4693,43 +4485,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4743,43 +4533,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4794,15 +4581,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4810,17 +4597,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4828,17 +4613,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4853,15 +4637,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4869,16 +4653,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4886,17 +4669,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4911,15 +4693,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4927,16 +4709,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4944,17 +4725,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4970,40 +4750,39 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5120,16 +4899,15 @@ define void @v_shuffle_v4p3_v3p3__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5179,16 +4957,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5196,16 +4973,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5213,17 +4989,15 @@ define void @v_shuffle_v4p3_v3p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5286,11 +5060,11 @@ define void @v_shuffle_v4p3_v3p3__4_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5332,42 +5106,43 @@ define void @v_shuffle_v4p3_v3p3__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5381,13 +5156,13 @@ define void @v_shuffle_v4p3_v3p3__5_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5429,15 +5204,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5445,17 +5220,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5463,17 +5237,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5488,15 +5261,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5504,15 +5277,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5520,16 +5293,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5544,51 +5317,49 @@ define void @v_shuffle_v4p3_v3p3__5_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: global_store_dwordx4 v3, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_2_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: global_store_dwordx4 v3, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5602,44 +5373,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5653,45 +5423,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5705,43 +5473,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5756,16 +5521,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5773,17 +5537,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5791,17 +5553,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5816,16 +5577,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5833,17 +5593,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5851,17 +5610,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5876,16 +5634,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5893,16 +5650,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5910,17 +5666,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5934,44 +5689,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5985,13 +5739,13 @@ define void @v_shuffle_v4p3_v3p3__u_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__u_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6090,16 +5844,15 @@ define void @v_shuffle_v4p3_v3p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6147,16 +5900,15 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6164,16 +5916,16 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6181,17 +5933,16 @@ define void @v_shuffle_v4p3_v3p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6251,14 +6002,13 @@ define void @v_shuffle_v4p3_v3p3__4_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__4_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6298,39 +6048,40 @@ define void @v_shuffle_v4p3_v3p3__5_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v3, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v3, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v3, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v3, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6345,16 +6096,15 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6362,16 +6112,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6379,16 +6129,16 @@ define void @v_shuffle_v4p3_v3p3__5_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6403,16 +6153,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6420,16 +6169,15 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6437,17 +6185,16 @@ define void @v_shuffle_v4p3_v3p3__5_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6464,14 +6211,13 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[1:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6481,14 +6227,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6498,15 +6244,14 @@ define void @v_shuffle_v4p3_v3p3__5_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6520,44 +6265,43 @@ define void @v_shuffle_v4p3_v3p3__5_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6571,39 +6315,43 @@ define void @v_shuffle_v4p3_v3p3__5_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6617,42 +6365,40 @@ define void @v_shuffle_v4p3_v3p3__5_5_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6667,16 +6413,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[4:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:6]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6684,17 +6429,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[4:6]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v7, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:4]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v4
-; GFX90A-NEXT: v_mov_b32_e32 v7, v4
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v4
-; GFX90A-NEXT: global_store_dwordx4 v5, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6702,17 +6445,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[4:6]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v7, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:4]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v4
-; GFX942-NEXT: global_store_dwordx4 v5, v[6:9], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v7, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6727,16 +6469,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6746,14 +6487,13 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6763,15 +6503,14 @@ define void @v_shuffle_v4p3_v3p3__5_5_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[6:7], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6786,16 +6525,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:2]
+; GFX900-NEXT: ; def v[2:4]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:5]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6803,16 +6541,15 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:2]
+; GFX90A-NEXT: ; def v[2:4]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v7, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:6]
+; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v6
-; GFX90A-NEXT: v_mov_b32_e32 v3, v6
-; GFX90A-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6820,17 +6557,16 @@ define void @v_shuffle_v4p3_v3p3__5_5_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:2]
+; GFX942-NEXT: ; def v[2:4]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v7, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:6]
+; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v6
-; GFX942-NEXT: global_store_dwordx4 v7, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6844,45 +6580,43 @@ define void @v_shuffle_v4p3_v3p3__5_5_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:4]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6896,42 +6630,41 @@ define void @v_shuffle_v4p3_v3p3__5_5_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:3]
+; GFX900-NEXT: ; def v[0:2]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v3, 0
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:2]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: global_store_dwordx4 v3, v[4:7], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v3p3__5_5_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:2]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[6:7], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v2
-; GFX942-NEXT: global_store_dwordx4 v3, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr addrspace(3)> asm "; def $0", "=v"()
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
index 1684b94cfd452..9672a7d0c0e8e 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p3.v4p3.ll
@@ -58,12 +58,11 @@ define void @v_shuffle_v4p3_v4p3__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__1_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -100,36 +99,33 @@ define void @v_shuffle_v4p3_v4p3__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -142,12 +138,11 @@ define void @v_shuffle_v4p3_v4p3__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -195,12 +190,11 @@ define void @v_shuffle_v4p3_v4p3__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__5_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -238,36 +232,33 @@ define void @v_shuffle_v4p3_v4p3__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -281,12 +272,11 @@ define void @v_shuffle_v4p3_v4p3__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -324,16 +314,14 @@ define void @v_shuffle_v4p3_v4p3__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_0_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -378,15 +366,14 @@ define void @v_shuffle_v4p3_v4p3__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_1_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -431,16 +418,14 @@ define void @v_shuffle_v4p3_v4p3__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_2_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -485,16 +470,14 @@ define void @v_shuffle_v4p3_v4p3__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_3_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -508,9 +491,8 @@ define void @v_shuffle_v4p3_v4p3__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -524,9 +506,9 @@ define void @v_shuffle_v4p3_v4p3__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -540,13 +522,12 @@ define void @v_shuffle_v4p3_v4p3__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_4_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -584,12 +565,12 @@ define void @v_shuffle_v4p3_v4p3__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_5_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -627,13 +608,12 @@ define void @v_shuffle_v4p3_v4p3__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_6_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -671,13 +651,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -688,9 +667,8 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -701,9 +679,8 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -717,17 +694,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_0_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -737,14 +712,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -754,15 +727,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -777,16 +747,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -794,16 +762,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -811,17 +778,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -836,15 +801,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -852,15 +816,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -868,16 +831,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -892,16 +854,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -909,16 +869,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -926,16 +885,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -949,43 +907,39 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -999,42 +953,39 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1048,13 +999,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_6_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1094,14 +1045,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_u:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1111,11 +1061,10 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v5, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1125,11 +1074,10 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v5, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1143,18 +1091,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1162,16 +1108,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1179,17 +1124,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1204,17 +1147,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1222,17 +1163,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1240,18 +1179,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1266,17 +1202,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1284,16 +1218,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1301,17 +1234,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1326,16 +1258,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1343,16 +1274,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1360,17 +1290,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1384,14 +1313,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1431,46 +1360,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_7_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v3
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_7_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v3
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1484,15 +1409,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1532,14 +1456,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1549,11 +1473,11 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1563,11 +1487,11 @@ define void @v_shuffle_v4p3_v4p3__7_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1582,13 +1506,12 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1596,13 +1519,12 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1610,13 +1532,12 @@ define void @v_shuffle_v4p3_v4p3__u_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1692,13 +1613,12 @@ define void @v_shuffle_v4p3_v4p3__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1706,13 +1626,12 @@ define void @v_shuffle_v4p3_v4p3__1_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1728,43 +1647,39 @@ define void @v_shuffle_v4p3_v4p3__2_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__2_0_0_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_0_0_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1777,15 +1692,14 @@ define void @v_shuffle_v4p3_v4p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1793,13 +1707,12 @@ define void @v_shuffle_v4p3_v4p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1807,13 +1720,12 @@ define void @v_shuffle_v4p3_v4p3__3_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1827,13 +1739,12 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v0
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v0, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v3, v1
+; GFX900-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1841,13 +1752,12 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v0
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, 0
+; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v0, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1855,13 +1765,12 @@ define void @v_shuffle_v4p3_v4p3__4_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v0, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1877,15 +1786,13 @@ define void @v_shuffle_v4p3_v4p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1893,16 +1800,15 @@ define void @v_shuffle_v4p3_v4p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1910,17 +1816,16 @@ define void @v_shuffle_v4p3_v4p3__5_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1935,17 +1840,15 @@ define void @v_shuffle_v4p3_v4p3__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -1953,17 +1856,15 @@ define void @v_shuffle_v4p3_v4p3__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v0
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -1971,18 +1872,15 @@ define void @v_shuffle_v4p3_v4p3__6_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -1997,17 +1895,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2015,16 +1911,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2032,17 +1927,16 @@ define void @v_shuffle_v4p3_v4p3__7_0_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2057,16 +1951,14 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2074,16 +1966,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2091,17 +1982,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2116,17 +2005,15 @@ define void @v_shuffle_v4p3_v4p3__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2134,17 +2021,16 @@ define void @v_shuffle_v4p3_v4p3__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2152,18 +2038,16 @@ define void @v_shuffle_v4p3_v4p3__7_1_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2178,16 +2062,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2195,16 +2078,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2212,16 +2094,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2236,17 +2117,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v8
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2295,17 +2174,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2313,16 +2190,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2330,17 +2206,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2355,16 +2229,15 @@ define void @v_shuffle_v4p3_v4p3__7_5_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2414,17 +2287,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2432,16 +2303,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2449,17 +2319,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2474,17 +2342,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2492,17 +2358,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2510,18 +2374,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2535,17 +2396,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2553,16 +2412,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2570,17 +2428,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2594,18 +2450,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_1_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2613,16 +2467,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2630,17 +2483,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2654,18 +2505,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v8
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2673,17 +2522,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v8
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2691,17 +2539,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v8
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2715,17 +2562,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_3_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v9
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2733,16 +2579,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2750,16 +2595,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2773,18 +2617,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2794,15 +2636,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v10, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2812,15 +2653,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2834,17 +2674,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -2852,16 +2691,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -2869,17 +2707,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_0(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -2893,17 +2729,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_0(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_6_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v10, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[6:9]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v10, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3045,11 +2880,11 @@ define void @v_shuffle_v4p3_v4p3__1_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3093,11 +2928,11 @@ define void @v_shuffle_v4p3_v4p3__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3107,11 +2942,11 @@ define void @v_shuffle_v4p3_v4p3__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3121,11 +2956,11 @@ define void @v_shuffle_v4p3_v4p3__2_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3138,14 +2973,14 @@ define void @v_shuffle_v4p3_v4p3__3_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3232,16 +3067,15 @@ define void @v_shuffle_v4p3_v4p3__5_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3291,16 +3125,15 @@ define void @v_shuffle_v4p3_v4p3__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3308,16 +3141,15 @@ define void @v_shuffle_v4p3_v4p3__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v4
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3325,17 +3157,15 @@ define void @v_shuffle_v4p3_v4p3__6_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3350,16 +3180,15 @@ define void @v_shuffle_v4p3_v4p3__7_1_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3409,16 +3238,14 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3426,16 +3253,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3443,17 +3269,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3468,17 +3292,14 @@ define void @v_shuffle_v4p3_v4p3__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3486,16 +3307,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3503,17 +3323,16 @@ define void @v_shuffle_v4p3_v4p3__7_0_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3528,17 +3347,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3546,16 +3363,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v10, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[6:9]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v10, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3563,16 +3379,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[6:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[8:9], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3587,16 +3402,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v7
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3645,17 +3459,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3663,16 +3475,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3680,17 +3491,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3705,16 +3514,15 @@ define void @v_shuffle_v4p3_v4p3__7_5_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3764,17 +3572,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3782,16 +3588,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3799,17 +3604,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3824,17 +3627,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3842,17 +3643,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3860,18 +3659,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3885,17 +3681,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3905,14 +3699,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3922,15 +3714,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -3944,18 +3733,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -3965,15 +3751,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -3983,16 +3766,12 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4007,17 +3786,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4027,15 +3804,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4045,15 +3820,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4068,17 +3841,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v8
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4088,15 +3859,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4106,15 +3875,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4129,17 +3896,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4147,17 +3912,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v2
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4165,17 +3928,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4190,16 +3951,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4207,17 +3967,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v3
-; GFX90A-NEXT: v_mov_b32_e32 v11, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4225,17 +3983,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_1(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v10, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4250,16 +4006,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_1(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4311,10 +4066,10 @@ define void @v_shuffle_v4p3_v4p3__u_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4401,11 +4156,10 @@ define void @v_shuffle_v4p3_v4p3__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4447,11 +4201,11 @@ define void @v_shuffle_v4p3_v4p3__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4461,11 +4215,11 @@ define void @v_shuffle_v4p3_v4p3__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4475,11 +4229,11 @@ define void @v_shuffle_v4p3_v4p3__2_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4492,14 +4246,14 @@ define void @v_shuffle_v4p3_v4p3__3_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4541,10 +4295,10 @@ define void @v_shuffle_v4p3_v4p3__4_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4584,16 +4338,15 @@ define void @v_shuffle_v4p3_v4p3__5_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v4
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4641,16 +4394,15 @@ define void @v_shuffle_v4p3_v4p3__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4658,16 +4410,15 @@ define void @v_shuffle_v4p3_v4p3__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -4675,16 +4426,15 @@ define void @v_shuffle_v4p3_v4p3__6_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -4699,16 +4449,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4756,19 +4505,18 @@ define void @v_shuffle_v4p3_v4p3__7_u_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
-; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_u_2_2:
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4811,17 +4559,14 @@ define void @v_shuffle_v4p3_v4p3__7_0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4868,15 +4613,14 @@ define void @v_shuffle_v4p3_v4p3__7_1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4923,16 +4667,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -4981,16 +4724,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5037,15 +4779,14 @@ define void @v_shuffle_v4p3_v4p3__7_5_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -5095,16 +4836,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5151,16 +4891,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5168,16 +4907,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5185,16 +4923,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5209,16 +4946,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5226,16 +4961,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5243,16 +4977,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5267,17 +5000,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5287,15 +5018,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5305,15 +5034,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5328,17 +5055,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5346,16 +5070,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5363,16 +5086,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5393,11 +5115,9 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5405,16 +5125,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5422,16 +5141,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5446,54 +5164,49 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v10, v7
-; GFX90A-NEXT: v_mov_b32_e32 v11, v7
-; GFX90A-NEXT: v_mov_b32_e32 v12, v4
-; GFX90A-NEXT: v_mov_b32_e32 v13, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[10:13], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v7
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5508,16 +5221,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v9, 0
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v7, v4
-; GFX900-NEXT: v_mov_b32_e32 v8, v2
-; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5525,16 +5237,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5542,17 +5253,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_2(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[4:5], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5567,16 +5277,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_2(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v6
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5624,39 +5333,40 @@ define void @v_shuffle_v4p3_v4p3__u_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5717,11 +5427,10 @@ define void @v_shuffle_v4p3_v4p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5731,11 +5440,11 @@ define void @v_shuffle_v4p3_v4p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5745,11 +5454,11 @@ define void @v_shuffle_v4p3_v4p3__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5762,42 +5471,40 @@ define void @v_shuffle_v4p3_v4p3__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5810,14 +5517,14 @@ define void @v_shuffle_v4p3_v4p3__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5827,11 +5534,11 @@ define void @v_shuffle_v4p3_v4p3__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5841,11 +5548,11 @@ define void @v_shuffle_v4p3_v4p3__3_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5858,39 +5565,40 @@ define void @v_shuffle_v4p3_v4p3__4_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5904,16 +5612,15 @@ define void @v_shuffle_v4p3_v4p3__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5923,14 +5630,14 @@ define void @v_shuffle_v4p3_v4p3__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5940,14 +5647,14 @@ define void @v_shuffle_v4p3_v4p3__5_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -5962,16 +5669,15 @@ define void @v_shuffle_v4p3_v4p3__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -5979,16 +5685,15 @@ define void @v_shuffle_v4p3_v4p3__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v6
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -5996,16 +5701,15 @@ define void @v_shuffle_v4p3_v4p3__6_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6019,17 +5723,16 @@ define void @v_shuffle_v4p3_v4p3__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v4
+; GFX900-NEXT: v_mov_b32_e32 v6, v4
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6039,14 +5742,14 @@ define void @v_shuffle_v4p3_v4p3__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6056,14 +5759,14 @@ define void @v_shuffle_v4p3_v4p3__7_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6077,16 +5780,15 @@ define void @v_shuffle_v4p3_v4p3__7_u_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6136,14 +5838,12 @@ define void @v_shuffle_v4p3_v4p3__7_0_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6190,15 +5890,14 @@ define void @v_shuffle_v4p3_v4p3__7_1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6244,17 +5943,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6302,16 +5999,15 @@ define void @v_shuffle_v4p3_v4p3__7_4_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6358,16 +6054,15 @@ define void @v_shuffle_v4p3_v4p3__7_5_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6416,16 +6111,15 @@ define void @v_shuffle_v4p3_v4p3__7_6_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6472,16 +6166,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6489,16 +6182,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6506,16 +6198,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6530,15 +6221,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6546,15 +6236,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6562,16 +6251,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6586,17 +6274,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v8
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6606,15 +6292,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v0
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6624,15 +6308,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6653,11 +6335,9 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6667,15 +6347,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v1
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6685,15 +6364,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v1
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6708,15 +6386,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6724,15 +6401,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6740,16 +6416,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6764,16 +6439,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6781,16 +6455,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6798,17 +6471,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6823,16 +6495,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6840,16 +6511,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -6857,17 +6527,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_3(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -6882,16 +6551,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_3(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -6989,12 +6657,11 @@ define void @v_shuffle_v4p3_v4p3__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7031,36 +6698,33 @@ define void @v_shuffle_v4p3_v4p3__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__2_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7073,12 +6737,11 @@ define void @v_shuffle_v4p3_v4p3__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__3_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7141,13 +6804,12 @@ define void @v_shuffle_v4p3_v4p3__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7155,13 +6817,12 @@ define void @v_shuffle_v4p3_v4p3__5_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7178,43 +6839,39 @@ define void @v_shuffle_v4p3_v4p3__6_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v0
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v0
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7228,15 +6885,14 @@ define void @v_shuffle_v4p3_v4p3__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7244,13 +6900,12 @@ define void @v_shuffle_v4p3_v4p3__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7258,13 +6913,12 @@ define void @v_shuffle_v4p3_v4p3__7_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7278,43 +6932,39 @@ define void @v_shuffle_v4p3_v4p3__7_u_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_u_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7329,17 +6979,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7352,11 +7000,10 @@ define void @v_shuffle_v4p3_v4p3__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v6, v2
-; GFX90A-NEXT: v_mov_b32_e32 v7, v2
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7366,15 +7013,14 @@ define void @v_shuffle_v4p3_v4p3__7_0_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v2
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7389,15 +7035,15 @@ define void @v_shuffle_v4p3_v4p3__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7407,14 +7053,13 @@ define void @v_shuffle_v4p3_v4p3__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7424,15 +7069,14 @@ define void @v_shuffle_v4p3_v4p3__7_1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7447,16 +7091,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7464,15 +7107,14 @@ define void @v_shuffle_v4p3_v4p3__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -7481,16 +7123,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7506,17 +7147,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7526,15 +7165,13 @@ define void @v_shuffle_v4p3_v4p3__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v4
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v5, v4
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7544,15 +7181,13 @@ define void @v_shuffle_v4p3_v4p3__7_3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v5, v4
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7566,46 +7201,42 @@ define void @v_shuffle_v4p3_v4p3__7_5_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v1
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v3
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7619,14 +7250,14 @@ define void @v_shuffle_v4p3_v4p3__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7634,13 +7265,12 @@ define void @v_shuffle_v4p3_v4p3__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: v_mov_b32_e32 v5, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v3, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7648,13 +7278,12 @@ define void @v_shuffle_v4p3_v4p3__7_6_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v3, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7668,46 +7297,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v0
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v0
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7721,14 +7346,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7769,17 +7393,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7787,17 +7409,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v2
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7805,17 +7425,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v2
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7830,17 +7448,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7848,16 +7464,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7865,17 +7480,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7890,15 +7503,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7906,16 +7519,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v4
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v0
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7923,17 +7535,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v4
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v0
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -7948,17 +7559,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -7966,16 +7575,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -7983,17 +7591,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_4(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[0:1] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8007,14 +7614,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8054,15 +7661,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_4(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v0
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v0
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8157,12 +7763,12 @@ define void @v_shuffle_v4p3_v4p3__0_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[0:3], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8216,10 +7822,9 @@ define void @v_shuffle_v4p3_v4p3__1_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v3
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8275,10 +7880,9 @@ define void @v_shuffle_v4p3_v4p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v4
; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8286,16 +7890,15 @@ define void @v_shuffle_v4p3_v4p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
-; GFX90A-NEXT: v_mov_b32_e32 v6, v5
-; GFX90A-NEXT: v_mov_b32_e32 v7, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8303,16 +7906,15 @@ define void @v_shuffle_v4p3_v4p3__2_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8330,13 +7932,12 @@ define void @v_shuffle_v4p3_v4p3__3_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: v_mov_b32_e32 v6, v5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8433,11 +8034,11 @@ define void @v_shuffle_v4p3_v4p3__5_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v1
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8482,11 +8083,11 @@ define void @v_shuffle_v4p3_v4p3__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8496,11 +8097,11 @@ define void @v_shuffle_v4p3_v4p3__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8510,11 +8111,11 @@ define void @v_shuffle_v4p3_v4p3__6_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8528,14 +8129,14 @@ define void @v_shuffle_v4p3_v4p3__7_5_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8577,14 +8178,13 @@ define void @v_shuffle_v4p3_v4p3__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
-; GFX900-NEXT: v_mov_b32_e32 v3, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8592,13 +8192,12 @@ define void @v_shuffle_v4p3_v4p3__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v1, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v1, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8606,13 +8205,12 @@ define void @v_shuffle_v4p3_v4p3__7_u_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v1, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v1, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8627,17 +8225,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8685,15 +8281,15 @@ define void @v_shuffle_v4p3_v4p3__7_1_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8741,17 +8337,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8759,15 +8353,14 @@ define void @v_shuffle_v4p3_v4p3__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
@@ -8776,16 +8369,15 @@ define void @v_shuffle_v4p3_v4p3__7_2_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[6:7], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[6:7] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8801,17 +8393,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8821,15 +8411,13 @@ define void @v_shuffle_v4p3_v4p3__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8839,15 +8427,13 @@ define void @v_shuffle_v4p3_v4p3__7_3_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8861,15 +8447,14 @@ define void @v_shuffle_v4p3_v4p3__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -8877,13 +8462,12 @@ define void @v_shuffle_v4p3_v4p3__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8891,13 +8475,12 @@ define void @v_shuffle_v4p3_v4p3__7_4_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[0:1] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8926,13 +8509,12 @@ define void @v_shuffle_v4p3_v4p3__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -8940,13 +8522,12 @@ define void @v_shuffle_v4p3_v4p3__7_6_5_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[2:3] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -8960,45 +8541,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9012,13 +8590,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9059,16 +8637,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9076,17 +8653,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9094,17 +8669,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9119,16 +8692,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
-; GFX900-NEXT: v_mov_b32_e32 v4, v5
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9136,17 +8708,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v1
-; GFX90A-NEXT: v_mov_b32_e32 v11, v3
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9154,17 +8725,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v1
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9179,16 +8749,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9196,16 +8765,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9213,17 +8781,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9238,17 +8805,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9256,17 +8821,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9274,17 +8838,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_5(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9298,45 +8861,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v4, v0
-; GFX900-NEXT: v_mov_b32_e32 v5, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v1
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9350,46 +8910,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_5(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v2
-; GFX900-NEXT: v_mov_b32_e32 v7, v1
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v1
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v2
-; GFX90A-NEXT: v_mov_b32_e32 v9, v1
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v2, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9406,10 +8962,10 @@ define void @v_shuffle_v4p3_v4p3__u_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9514,10 +9070,9 @@ define void @v_shuffle_v4p3_v4p3__1_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v4
; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9571,10 +9126,9 @@ define void @v_shuffle_v4p3_v4p3__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9582,16 +9136,16 @@ define void @v_shuffle_v4p3_v4p3__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
; GFX90A-NEXT: v_mov_b32_e32 v5, v6
-; GFX90A-NEXT: v_mov_b32_e32 v7, v6
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9599,16 +9153,16 @@ define void @v_shuffle_v4p3_v4p3__2_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v6
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v6
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9626,13 +9180,12 @@ define void @v_shuffle_v4p3_v4p3__3_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v6
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9728,11 +9281,10 @@ define void @v_shuffle_v4p3_v4p3__5_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9775,11 +9327,11 @@ define void @v_shuffle_v4p3_v4p3__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9789,11 +9341,11 @@ define void @v_shuffle_v4p3_v4p3__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v2
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: v_mov_b32_e32 v3, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v5, v2
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -9803,11 +9355,11 @@ define void @v_shuffle_v4p3_v4p3__6_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v2
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v3, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v2
+; GFX942-NEXT: v_mov_b32_e32 v5, v2
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -9821,14 +9373,14 @@ define void @v_shuffle_v4p3_v4p3__7_6_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9868,13 +9420,13 @@ define void @v_shuffle_v4p3_v4p3__7_u_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9915,16 +9467,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -9972,16 +9523,15 @@ define void @v_shuffle_v4p3_v4p3__7_1_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10031,15 +9581,14 @@ define void @v_shuffle_v4p3_v4p3__7_2_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -10088,16 +9637,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10105,16 +9653,16 @@ define void @v_shuffle_v4p3_v4p3__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v6
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v6
+; GFX90A-NEXT: v_mov_b32_e32 v5, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10122,16 +9670,16 @@ define void @v_shuffle_v4p3_v4p3__7_3_6_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v6
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v6
+; GFX942-NEXT: v_mov_b32_e32 v5, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10192,13 +9740,14 @@ define void @v_shuffle_v4p3_v4p3__7_5_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10238,14 +9787,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10287,14 +9836,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10337,17 +9885,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, v4
-; GFX900-NEXT: v_mov_b32_e32 v6, v4
-; GFX900-NEXT: v_mov_b32_e32 v7, v0
-; GFX900-NEXT: v_mov_b32_e32 v8, v3
-; GFX900-NEXT: global_store_dwordx4 v9, v[5:8], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10355,17 +9901,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, v5
-; GFX90A-NEXT: v_mov_b32_e32 v9, v5
-; GFX90A-NEXT: v_mov_b32_e32 v10, v0
-; GFX90A-NEXT: v_mov_b32_e32 v11, v4
-; GFX90A-NEXT: global_store_dwordx4 v6, v[8:11], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v0, v7
+; GFX90A-NEXT: v_mov_b32_e32 v1, v7
+; GFX90A-NEXT: v_mov_b32_e32 v3, v6
+; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10373,17 +9918,17 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v8, v5
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
-; GFX942-NEXT: v_mov_b32_e32 v11, v4
-; GFX942-NEXT: global_store_dwordx4 v6, v[8:11], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v7
+; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b32_e32 v3, v6
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10398,17 +9943,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v10, 0
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, v5
-; GFX900-NEXT: v_mov_b32_e32 v7, v5
-; GFX900-NEXT: v_mov_b32_e32 v8, v1
-; GFX900-NEXT: v_mov_b32_e32 v9, v4
-; GFX900-NEXT: global_store_dwordx4 v10, v[6:9], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10416,16 +9959,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v5
-; GFX90A-NEXT: v_mov_b32_e32 v1, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10433,17 +9975,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[4:5] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v5
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10458,16 +9998,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10517,17 +10056,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v0, v7
-; GFX900-NEXT: v_mov_b32_e32 v1, v7
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10535,16 +10072,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0]
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10552,17 +10088,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_6(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_pk_mov_b32 v[2:3], v[2:3], v[6:7] op_sel:[1,0]
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_pk_mov_b32 v[4:5], v[4:5], v[2:3] op_sel:[1,0]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10576,46 +10111,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v2
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v0, v5
+; GFX90A-NEXT: v_mov_b32_e32 v1, v5
+; GFX90A-NEXT: v_mov_b32_e32 v3, v4
+; GFX90A-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v2
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v0, v5
+; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b32_e32 v3, v4
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10629,15 +10160,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_6(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v2
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v2
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10677,39 +10207,40 @@ define void @v_shuffle_v4p3_v4p3__u_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__u_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__u_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__u_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10789,10 +10320,10 @@ define void @v_shuffle_v4p3_v4p3__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v1
+; GFX900-NEXT: v_mov_b32_e32 v2, v5
; GFX900-NEXT: v_mov_b32_e32 v3, v5
; GFX900-NEXT: v_mov_b32_e32 v4, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v6, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10800,16 +10331,16 @@ define void @v_shuffle_v4p3_v4p3__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v1
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v5
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10817,17 +10348,16 @@ define void @v_shuffle_v4p3_v4p3__1_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v1
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v5
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10848,10 +10378,10 @@ define void @v_shuffle_v4p3_v4p3__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v2
+; GFX900-NEXT: v_mov_b32_e32 v3, v6
; GFX900-NEXT: v_mov_b32_e32 v4, v6
; GFX900-NEXT: v_mov_b32_e32 v5, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10859,16 +10389,16 @@ define void @v_shuffle_v4p3_v4p3__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v2
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v3, v7
+; GFX90A-NEXT: v_mov_b32_e32 v4, v7
; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10876,16 +10406,16 @@ define void @v_shuffle_v4p3_v4p3__2_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b32_e32 v4, v7
; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -10903,13 +10433,13 @@ define void @v_shuffle_v4p3_v4p3__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v4, v7
+; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v6, v7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -10917,16 +10447,16 @@ define void @v_shuffle_v4p3_v4p3__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v5
; GFX90A-NEXT: v_mov_b32_e32 v4, v3
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -10934,16 +10464,17 @@ define void @v_shuffle_v4p3_v4p3__3_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11006,11 +10537,10 @@ define void @v_shuffle_v4p3_v4p3__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v1
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11020,11 +10550,11 @@ define void @v_shuffle_v4p3_v4p3__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v1
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v1
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11034,11 +10564,11 @@ define void @v_shuffle_v4p3_v4p3__5_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v1
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v1
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11052,42 +10582,40 @@ define void @v_shuffle_v4p3_v4p3__6_7_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__6_7_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v6, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v2
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__6_7_7_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v2
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: v_mov_b32_e32 v2, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v4, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__6_7_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: v_mov_b32_e32 v2, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_mov_b32_e32 v4, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11101,13 +10629,13 @@ define void @v_shuffle_v4p3_v4p3__7_u_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_u_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v4, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v4, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11148,16 +10676,15 @@ define void @v_shuffle_v4p3_v4p3__7_0_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v0
-; GFX900-NEXT: v_mov_b32_e32 v3, v4
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11205,16 +10732,15 @@ define void @v_shuffle_v4p3_v4p3__7_1_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v5
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: global_store_dwordx4 v6, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11264,15 +10790,14 @@ define void @v_shuffle_v4p3_v4p3__7_2_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[3:6]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: v_mov_b32_e32 v4, v2
-; GFX900-NEXT: v_mov_b32_e32 v5, v6
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
@@ -11321,16 +10846,15 @@ define void @v_shuffle_v4p3_v4p3__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[1:4]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v7
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11338,16 +10862,16 @@ define void @v_shuffle_v4p3_v4p3__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v7
; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v3
-; GFX90A-NEXT: v_mov_b32_e32 v6, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v5, v7
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11355,16 +10879,16 @@ define void @v_shuffle_v4p3_v4p3__7_3_7_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
-; GFX942-NEXT: v_mov_b32_e32 v6, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11378,15 +10902,14 @@ define void @v_shuffle_v4p3_v4p3__7_4_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_4_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v4, v0
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11426,13 +10949,14 @@ define void @v_shuffle_v4p3_v4p3__7_5_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_5_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v1
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11472,14 +10996,14 @@ define void @v_shuffle_v4p3_v4p3__7_6_7_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_6_7_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v2
-; GFX900-NEXT: v_mov_b32_e32 v2, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v2
+; GFX900-NEXT: v_mov_b32_e32 v5, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11519,13 +11043,13 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_u_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v5, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11536,9 +11060,9 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: v_mov_b32_e32 v4, 0
-; GFX90A-NEXT: v_mov_b32_e32 v0, v3
-; GFX90A-NEXT: v_mov_b32_e32 v1, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v4, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11549,9 +11073,9 @@ define void @v_shuffle_v4p3_v4p3__7_7_u_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v3
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v4, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11566,16 +11090,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[5:8]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v9, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[1:4]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: v_mov_b32_e32 v1, v4
-; GFX900-NEXT: v_mov_b32_e32 v2, v4
-; GFX900-NEXT: v_mov_b32_e32 v3, v0
-; GFX900-NEXT: global_store_dwordx4 v5, v[1:4], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v9, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11583,16 +11106,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v0
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11600,17 +11122,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_0_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11625,16 +11145,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ; def v[4:7]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v8, 0
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[2:5]
+; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v6, 0
-; GFX900-NEXT: v_mov_b32_e32 v2, v5
-; GFX900-NEXT: v_mov_b32_e32 v3, v5
-; GFX900-NEXT: v_mov_b32_e32 v4, v1
-; GFX900-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v8, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11642,16 +11161,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[2:5]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, 0
-; GFX90A-NEXT: v_mov_b32_e32 v2, v5
-; GFX90A-NEXT: v_mov_b32_e32 v3, v5
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v8, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v8, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11659,17 +11178,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_1_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[2:5]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v5
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v1
-; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11684,16 +11202,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[0:3]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[3:6]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v7, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v6
-; GFX900-NEXT: v_mov_b32_e32 v1, v6
-; GFX900-NEXT: v_mov_b32_e32 v3, v6
-; GFX900-NEXT: global_store_dwordx4 v7, v[0:3], s[16:17]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def v[0:3]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11701,16 +11218,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[0:3]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v0, v7
-; GFX90A-NEXT: v_mov_b32_e32 v1, v7
-; GFX90A-NEXT: v_mov_b32_e32 v3, v7
-; GFX90A-NEXT: global_store_dwordx4 v8, v[0:3], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11718,17 +11234,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_2_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[0:3]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v7
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11743,16 +11258,15 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def v[4:7]
+; GFX900-NEXT: ; def v[2:5]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, v7
-; GFX900-NEXT: v_mov_b32_e32 v5, v7
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
; GFX900-NEXT: v_mov_b32_e32 v6, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -11760,16 +11274,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def v[4:7]
+; GFX90A-NEXT: ; def v[2:5]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v8, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v4, v7
-; GFX90A-NEXT: v_mov_b32_e32 v5, v7
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v5
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
@@ -11777,16 +11291,16 @@ define void @v_shuffle_v4p3_v4p3__7_7_3_7(ptr addrspace(1) inreg %ptr) {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def v[4:7]
+; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, v7
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v4, v5
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11800,46 +11314,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_4_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_4_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v0
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v0
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_4_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v0
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v0
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_4_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v0
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11853,46 +11363,42 @@ define void @v_shuffle_v4p3_v4p3__7_7_5_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_5_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_mov_b32_e32 v8, 0
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_mov_b32_e32 v4, v3
-; GFX900-NEXT: v_mov_b32_e32 v5, v3
-; GFX900-NEXT: v_mov_b32_e32 v6, v1
-; GFX900-NEXT: v_mov_b32_e32 v7, v3
-; GFX900-NEXT: global_store_dwordx4 v8, v[4:7], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v5, v1
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: v_shuffle_v4p3_v4p3__7_7_5_7:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, 0
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def v[0:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_mov_b32_e32 v6, v3
-; GFX90A-NEXT: v_mov_b32_e32 v7, v3
-; GFX90A-NEXT: v_mov_b32_e32 v8, v1
-; GFX90A-NEXT: v_mov_b32_e32 v9, v3
-; GFX90A-NEXT: global_store_dwordx4 v4, v[6:9], s[16:17]
+; GFX90A-NEXT: v_mov_b32_e32 v6, 0
+; GFX90A-NEXT: v_mov_b32_e32 v2, v3
+; GFX90A-NEXT: v_mov_b32_e32 v4, v1
+; GFX90A-NEXT: v_mov_b32_e32 v5, v3
+; GFX90A-NEXT: global_store_dwordx4 v6, v[2:5], s[16:17]
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX942-LABEL: v_shuffle_v4p3_v4p3__7_7_5_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v3
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
-; GFX942-NEXT: v_mov_b32_e32 v8, v1
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
-; GFX942-NEXT: global_store_dwordx4 v4, v[6:9], s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: v_mov_b32_e32 v2, v3
+; GFX942-NEXT: v_mov_b32_e32 v4, v1
+; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr addrspace(3)> asm "; def $0", "=v"()
@@ -11906,13 +11412,14 @@ define void @v_shuffle_v4p3_v4p3__7_7_6_7(ptr addrspace(1) inreg %ptr) {
; GFX900-LABEL: v_shuffle_v4p3_v4p3__7_7_6_7:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v7, 0
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def v[0:3]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: v_mov_b32_e32 v4, 0
-; GFX900-NEXT: v_mov_b32_e32 v0, v3
-; GFX900-NEXT: v_mov_b32_e32 v1, v3
-; GFX900-NEXT: global_store_dwordx4 v4, v[0:3], s[16:17]
+; GFX900-NEXT: v_mov_b32_e32 v4, v3
+; GFX900-NEXT: v_mov_b32_e32 v5, v2
+; GFX900-NEXT: v_mov_b32_e32 v6, v3
+; GFX900-NEXT: global_store_dwordx4 v7, v[3:6], s[16:17]
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
index bb0b661e800c3..8c634934947a4 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
@@ -10,18 +10,17 @@ define amdgpu_kernel void @spam(ptr addrspace(1) noalias %arg) {
; CHECK-LABEL: spam:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
-; CHECK-NEXT: v_lshlrev_b32_e32 v4, 3, v0
-; CHECK-NEXT: v_mov_b32_e32 v5, 0
+; CHECK-NEXT: v_lshlrev_b32_e32 v0, 3, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_mov_b32 s3, 0xf000
; CHECK-NEXT: s_mov_b32 s2, 0
-; CHECK-NEXT: v_mov_b32_e32 v1, 0x7ff80000
-; CHECK-NEXT: v_mov_b32_e32 v0, v5
+; CHECK-NEXT: v_mov_b32_e32 v2, 0x7ff80000
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 offset:16
+; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v[0:1], s[0:3], 0 addr64 offset:16
; CHECK-NEXT: s_waitcnt expcnt(0)
-; CHECK-NEXT: v_mov_b32_e32 v2, v5
-; CHECK-NEXT: v_mov_b32_e32 v3, v5
-; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 offset:48
+; CHECK-NEXT: v_mov_b32_e32 v3, v1
+; CHECK-NEXT: v_mov_b32_e32 v4, v1
+; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v[0:1], s[0:3], 0 addr64 offset:48
; CHECK-NEXT: s_endpgm
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
diff --git a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
index 1a8f198ecf70a..69f6c38d55a2d 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-vselect-and-mask.ll
@@ -54,27 +54,24 @@ define amdgpu_kernel void @widen_vselect_and_mask_v4i64(<4 x i64> %arg) #0 {
; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[4:5], 0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_mov_b64 s[8:9], 16
; GCN-NEXT: s_mov_b32 s11, 0xf000
; GCN-NEXT: s_mov_b32 s10, -1
-; GCN-NEXT: v_mov_b32_e32 v1, v0
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v3, v0
+; GCN-NEXT: v_mov_b32_e32 v2, v1
+; GCN-NEXT: v_mov_b32_e32 v3, v1
+; GCN-NEXT: v_mov_b32_e32 v4, v1
; GCN-NEXT: v_cmp_eq_u64_e64 s[2:3], s[0:1], 0
-; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[2:3]
+; GCN-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[2:3]
; GCN-NEXT: v_cmp_ne_u64_e64 s[0:1], s[0:1], 0
-; GCN-NEXT: v_mov_b32_e32 v5, v4
-; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[4:5]
+; GCN-NEXT: v_mov_b32_e32 v6, v5
+; GCN-NEXT: v_cmp_lt_i64_e32 vcc, -1, v[5:6]
; GCN-NEXT: s_and_b64 s[0:1], vcc, s[0:1]
-; GCN-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[0:1]
-; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
-; GCN-NEXT: v_mov_b32_e32 v5, v0
-; GCN-NEXT: v_mov_b32_e32 v6, v0
-; GCN-NEXT: v_mov_b32_e32 v7, v0
+; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GCN-NEXT: buffer_store_dwordx4 v[1:4], off, s[8:11], 0
; GCN-NEXT: s_mov_b32 s6, s10
; GCN-NEXT: s_mov_b32 s7, s11
-; GCN-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0
+; GCN-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; GCN-NEXT: s_endpgm
bb:
%tmp = extractelement <4 x i64> %arg, i64 0
More information about the llvm-commits
mailing list